Autogenerate the BCP 47 to OpenType mappings

The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.

[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry

Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.

Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.

Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.

One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.

The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".

gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
This commit is contained in:
David Corbett 2017-12-08 22:45:52 -05:00 committed by Behdad Esfahbod
parent 2c7d4db7af
commit 2f1f961cc0
7 changed files with 3092 additions and 838 deletions

View File

@ -291,11 +291,12 @@ GENERATORS = \
gen-emoji-table.py \ gen-emoji-table.py \
gen-indic-table.py \ gen-indic-table.py \
gen-os2-unicode-ranges.py \ gen-os2-unicode-ranges.py \
gen-tag-table.py \
gen-use-table.py \ gen-use-table.py \
$(NULL) $(NULL)
EXTRA_DIST += $(GENERATORS) EXTRA_DIST += $(GENERATORS)
unicode-tables: arabic-table indic-table use-table emoji-table unicode-tables: arabic-table indic-table tag-table use-table emoji-table
arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt arabic-table: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-arabic-table.hh \ $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-arabic-table.hh \
@ -305,6 +306,10 @@ indic-table: gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategor
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-indic-table.cc \ $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-indic-table.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-indic-table.cc; false) || ($(RM) $(srcdir)/hb-ot-shape-complex-indic-table.cc; false)
tag-table: gen-tag-table.py languagetags language-subtag-registry
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-tag-table.hh \
|| ($(RM) $(srcdir)/hb-ot-tag-table.hh; false)
use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \ $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false) || ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false)
@ -315,7 +320,7 @@ emoji-table: gen-emoji-table.py emoji-data.txt
built-sources: $(BUILT_SOURCES) built-sources: $(BUILT_SOURCES)
.PHONY: unicode-tables arabic-table indic-table use-table emoji-table built-sources .PHONY: unicode-tables arabic-table indic-table tag-table use-table emoji-table built-sources
RAGEL_GENERATED = \ RAGEL_GENERATED = \
$(patsubst %,$(srcdir)/%,$(HB_BASE_RAGEL_GENERATED_sources)) \ $(patsubst %,$(srcdir)/%,$(HB_BASE_RAGEL_GENERATED_sources)) \

View File

@ -38,6 +38,7 @@ HB_BASE_sources = \
hb-ot-os2-unicode-ranges.hh \ hb-ot-os2-unicode-ranges.hh \
hb-ot-post-macroman.hh \ hb-ot-post-macroman.hh \
hb-ot-post-table.hh \ hb-ot-post-table.hh \
hb-ot-tag-table.hh \
hb-ot-tag.cc \ hb-ot-tag.cc \
hb.hh \ hb.hh \
hb-set-digest.hh \ hb-set-digest.hh \

1013
src/gen-tag-table.py Executable file

File diff suppressed because it is too large Load Diff

1997
src/hb-ot-tag-table.hh Normal file

File diff suppressed because it is too large Load Diff

View File

@ -167,732 +167,6 @@ hb_ot_tag_to_script (hb_tag_t tag)
/* hb_language_t */ /* hb_language_t */
typedef struct {
char language[4];
hb_tag_t tags[HB_OT_MAX_TAGS_PER_LANGUAGE];
} LangTag;
/*
* Complete list at:
* https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
*
* Generated by intersecting the OpenType language tag list from
* Draft OpenType 1.5 spec, with with the ISO 639-3 codes from
* 2008-08-04, matching on name, and finally adjusted manually.
*
* Updated on 2012-12-07 with more research into remaining codes.
*
* Updated on 2013-11-23 based on usage in SIL and Microsoft fonts,
* the new proposal from Microsoft, and latest ISO 639-3 names.
*
* Some items still missing. Those are commented out at the end.
* Keep sorted for bsearch.
*
* Updated as of 2015-05-06: OT1.7 on MS website has some newer
* items that we don't have here, eg. Zazaki. This is the new
* items in OpenType 1.7 (red items), most of which we have:
* https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
*/
static const LangTag ot_languages[] = {
{"aa", {HB_TAG('A','F','R',' ')}}, /* Afar */
{"ab", {HB_TAG('A','B','K',' ')}}, /* Abkhazian */
{"abq", {HB_TAG('A','B','A',' ')}}, /* Abaza */
{"acf", {HB_TAG('F','A','N',' ')}}, /* French Antillean */
{"ach", {HB_TAG('A','C','H',' ')}}, /* Acoli */
{"acr", {HB_TAG('A','C','R',' ')}}, /* Achi */
{"ada", {HB_TAG('D','N','G',' ')}}, /* Dangme */
{"ady", {HB_TAG('A','D','Y',' ')}}, /* Adyghe */
{"af", {HB_TAG('A','F','K',' ')}}, /* Afrikaans */
{"ahg", {HB_TAG('A','G','W',' ')}}, /* Agaw */
{"aii", {HB_TAG('S','W','A',' ')}}, /* Swadaya Aramaic */
{"aio", {HB_TAG('A','I','O',' ')}}, /* Aiton */
{"aiw", {HB_TAG('A','R','I',' ')}}, /* Aari */
{"ak", {HB_TAG('T','W','I',' ')}}, /* Akan [macrolanguage] */
{"aka", {HB_TAG('A','K','A',' ')}}, /* Akan */
{"alt", {HB_TAG('A','L','T',' ')}}, /* [Southern] Altai */
{"am", {HB_TAG('A','M','H',' ')}}, /* Amharic */
{"amf", {HB_TAG('H','B','N',' ')}}, /* Hammer-Banna */
{"amw", {HB_TAG('S','Y','R',' ')}}, /* Western Neo-Aramaic */
{"an", {HB_TAG('A','R','G',' ')}}, /* Aragonese */
{"ang", {HB_TAG('A','N','G',' ')}}, /* Old English (ca. 450-1100) */
{"ar", {HB_TAG('A','R','A',' ')}}, /* Arabic [macrolanguage] */
{"arb", {HB_TAG('A','R','A',' ')}}, /* Standard Arabic */
{"arn", {HB_TAG('M','A','P',' ')}}, /* Mapudungun */
{"ary", {HB_TAG('M','O','R',' ')}}, /* Moroccan Arabic */
{"as", {HB_TAG('A','S','M',' ')}}, /* Assamese */
{"ast", {HB_TAG('A','S','T',' ')}}, /* Asturian/Asturleonese/Bable/Leonese */
{"ath", {HB_TAG('A','T','H',' ')}}, /* Athapaskan [family] */
{"atj", {HB_TAG('R','C','R',' ')}}, /* R-Cree */
{"atv", {HB_TAG('A','L','T',' ')}}, /* [Northern] Altai */
{"av", {HB_TAG('A','V','R',' ')}}, /* Avaric */
{"awa", {HB_TAG('A','W','A',' ')}}, /* Awadhi */
{"ay", {HB_TAG('A','Y','M',' ')}}, /* Aymara [macrolanguage] */
{"az", {HB_TAG('A','Z','E',' ')}}, /* Azerbaijani [macrolanguage] */
{"azb", {HB_TAG('A','Z','B',' ')}}, /* South Azerbaijani */
{"azj", {HB_TAG('A','Z','E',' ')}}, /* North Azerbaijani */
{"ba", {HB_TAG('B','S','H',' ')}}, /* Bashkir */
{"bad", {HB_TAG('B','A','D','0')}}, /* Banda */
{"bai", {HB_TAG('B','M','L',' ')}}, /* Bamileke [family] */
{"bal", {HB_TAG('B','L','I',' ')}}, /* Baluchi [macrolangauge] */
{"ban", {HB_TAG('B','A','N',' ')}}, /* Balinese */
{"bar", {HB_TAG('B','A','R',' ')}}, /* Bavarian */
{"bbc", {HB_TAG('B','B','C',' ')}}, /* Batak Toba */
{"bci", {HB_TAG('B','A','U',' ')}}, /* Baoulé */
{"bcl", {HB_TAG('B','I','K',' ')}}, /* Central Bikol */
{"bcq", {HB_TAG('B','C','H',' ')}}, /* Bench */
{"bdy", {HB_TAG('B','D','Y',' ')}}, /* Bandjalang */
{"be", {HB_TAG('B','E','L',' ')}}, /* Belarusian */
{"bem", {HB_TAG('B','E','M',' ')}}, /* Bemba (Zambia) */
{"ber", {HB_TAG('B','E','R',' ')}}, /* Berber [family] */
{"bfq", {HB_TAG('B','A','D',' ')}}, /* Badaga */
{"bft", {HB_TAG('B','L','T',' ')}}, /* Balti */
{"bfu", {HB_TAG('L','A','H',' ')}}, /* Lahuli */
{"bfy", {HB_TAG('B','A','G',' ')}}, /* Baghelkhandi */
{"bg", {HB_TAG('B','G','R',' ')}}, /* Bulgarian */
{"bgc", {HB_TAG('B','G','C',' ')}}, /* Haryanvi */
{"bgq", {HB_TAG('B','G','Q',' ')}}, /* Bagri */
{"bgr", {HB_TAG('Q','I','N',' ')}}, /* Bawm Chin */
{"bhb", {HB_TAG('B','H','I',' ')}}, /* Bhili */
{"bhk", {HB_TAG('B','I','K',' ')}}, /* Albay Bicolano (retired code) */
{"bho", {HB_TAG('B','H','O',' ')}}, /* Bhojpuri */
{"bi", {HB_TAG('B','I','S',' ')}}, /* Bislama */
{"bik", {HB_TAG('B','I','K',' ')}}, /* Bikol [macrolanguage] */
{"bin", {HB_TAG('E','D','O',' ')}}, /* Bini */
{"bjj", {HB_TAG('B','J','J',' ')}}, /* Kanauji */
{"bjt", {HB_TAG('B','L','N',' ')}}, /* Balanta-Ganja */
{"bla", {HB_TAG('B','K','F',' ')}}, /* Blackfoot */
{"ble", {HB_TAG('B','L','N',' ')}}, /* Balanta-Kentohe */
{"blk", {HB_TAG('B','L','K',' ')}}, /* Pa'O/Pa'o Karen */
{"bln", {HB_TAG('B','I','K',' ')}}, /* Southern Catanduanes Bikol */
{"bm", {HB_TAG('B','M','B',' ')}}, /* Bambara */
{"bn", {HB_TAG('B','E','N',' ')}}, /* Bengali */
{"bo", {HB_TAG('T','I','B',' ')}}, /* Tibetan */
{"bpy", {HB_TAG('B','P','Y',' ')}}, /* Bishnupriya */
{"bqi", {HB_TAG('L','R','C',' ')}}, /* Bakhtiari */
{"br", {HB_TAG('B','R','E',' ')}}, /* Breton */
{"bra", {HB_TAG('B','R','I',' ')}}, /* Braj Bhasha */
{"brh", {HB_TAG('B','R','H',' ')}}, /* Brahui */
{"brx", {HB_TAG('B','R','X',' ')}}, /* Bodo (India) */
{"bs", {HB_TAG('B','O','S',' ')}}, /* Bosnian */
{"btb", {HB_TAG('B','T','I',' ')}}, /* Beti (Cameroon) */
{"bto", {HB_TAG('B','I','K',' ')}}, /* Rinconada Bikol */
{"bts", {HB_TAG('B','T','S',' ')}}, /* Batak Simalungun */
{"bug", {HB_TAG('B','U','G',' ')}}, /* Buginese */
{"bxr", {HB_TAG('R','B','U',' ')}}, /* Russian Buriat */
{"byn", {HB_TAG('B','I','L',' ')}}, /* Bilen */
{"ca", {HB_TAG('C','A','T',' ')}}, /* Catalan */
{"cak", {HB_TAG('C','A','K',' ')}}, /* Kaqchikel */
{"cbk", {HB_TAG('C','B','K',' ')}}, /* Chavacano */
{"cbl", {HB_TAG('Q','I','N',' ')}}, /* Bualkhaw Chin */
{"cco", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"ce", {HB_TAG('C','H','E',' ')}}, /* Chechen */
{"ceb", {HB_TAG('C','E','B',' ')}}, /* Cebuano */
{"cfm", {HB_TAG('H','A','L',' ')}}, /* Halam/Falam Chin */
{"cgg", {HB_TAG('C','G','G',' ')}}, /* Chiga */
{"ch", {HB_TAG('C','H','A',' ')}}, /* Chamorro */
{"chj", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"chk", {HB_TAG('C','H','K','0')}}, /* Chuukese */
{"cho", {HB_TAG('C','H','O',' ')}}, /* Choctaw */
{"chp", {HB_TAG('C','H','P',' ')}}, /* Chipewyan */
{"chq", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"chr", {HB_TAG('C','H','R',' ')}}, /* Cherokee */
{"chy", {HB_TAG('C','H','Y',' ')}}, /* Cheyenne */
{"chz", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"cja", {HB_TAG('C','J','A',' ')}}, /* Western Cham */
{"cjm", {HB_TAG('C','J','M',' ')}}, /* Eastern Cham */
{"cka", {HB_TAG('Q','I','N',' ')}}, /* Khumi Awa Chin */
{"ckb", {HB_TAG('K','U','R',' ')}}, /* Central Kurdish (Sorani) */
{"ckt", {HB_TAG('C','H','K',' ')}}, /* Chukchi */
{"cld", {HB_TAG('S','Y','R',' ')}}, /* Chaldean Neo-Aramaic */
{"cle", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"cmr", {HB_TAG('Q','I','N',' ')}}, /* Mro-Khimi Chin */
{"cnb", {HB_TAG('Q','I','N',' ')}}, /* Chinbon Chin */
{"cnh", {HB_TAG('Q','I','N',' ')}}, /* Hakha Chin */
{"cnk", {HB_TAG('Q','I','N',' ')}}, /* Khumi Chin */
{"cnl", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"cnt", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"cnw", {HB_TAG('Q','I','N',' ')}}, /* Ngawn Chin */
{"cop", {HB_TAG('C','O','P',' ')}}, /* Coptic */
{"cpa", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"cpp", {HB_TAG('C','P','P',' ')}}, /* Creoles */
{"cr", {HB_TAG('C','R','E',' ')}}, /* Cree */
{"cre", {HB_TAG('Y','C','R',' ')}}, /* Y-Cree */
{"crh", {HB_TAG('C','R','T',' ')}}, /* Crimean Tatar */
{"crj", {HB_TAG('E','C','R',' ')}}, /* [Southern] East Cree */
{"crk", {HB_TAG('W','C','R',' ')}}, /* West-Cree */
{"crl", {HB_TAG('E','C','R',' ')}}, /* [Northern] East Cree */
{"crm", {HB_TAG('M','C','R',' ')}}, /* Moose Cree */
{"crx", {HB_TAG('C','R','R',' ')}}, /* Carrier */
{"cs", {HB_TAG('C','S','Y',' ')}}, /* Czech */
{"csa", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"csb", {HB_TAG('C','S','B',' ')}}, /* Kashubian */
{"csh", {HB_TAG('Q','I','N',' ')}}, /* Asho Chin */
{"cso", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"csy", {HB_TAG('Q','I','N',' ')}}, /* Siyin Chin */
{"ctd", {HB_TAG('Q','I','N',' ')}}, /* Tedim Chin */
{"cte", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"ctg", {HB_TAG('C','T','G',' ')}}, /* Chittagonian */
{"ctl", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"cts", {HB_TAG('B','I','K',' ')}}, /* Northern Catanduanes Bikol */
{"cu", {HB_TAG('C','S','L',' ')}}, /* Church Slavic */
{"cuc", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"cuk", {HB_TAG('C','U','K',' ')}}, /* San Blas Kuna */
{"cv", {HB_TAG('C','H','U',' ')}}, /* Chuvash */
{"cvn", {HB_TAG('C','C','H','N')}}, /* Chinantec */
{"cwd", {HB_TAG('D','C','R',' ')}}, /* Woods Cree */
{"cy", {HB_TAG('W','E','L',' ')}}, /* Welsh */
{"czt", {HB_TAG('Q','I','N',' ')}}, /* Zotung Chin */
{"da", {HB_TAG('D','A','N',' ')}}, /* Danish */
{"dao", {HB_TAG('Q','I','N',' ')}}, /* Daai Chin */
{"dap", {HB_TAG('N','I','S',' ')}}, /* Nisi (India) */
{"dar", {HB_TAG('D','A','R',' ')}}, /* Dargwa */
{"dax", {HB_TAG('D','A','X',' ')}}, /* Dayi */
{"de", {HB_TAG('D','E','U',' ')}}, /* German */
{"dgo", {HB_TAG('D','G','O',' ')}}, /* Dogri */
{"dhd", {HB_TAG('M','A','W',' ')}}, /* Dhundari */
{"dhg", {HB_TAG('D','H','G',' ')}}, /* Dhangu */
{"din", {HB_TAG('D','N','K',' ')}}, /* Dinka [macrolanguage] */
{"diq", {HB_TAG('D','I','Q',' ')}}, /* Dimli */
{"dje", {HB_TAG('D','J','R',' ')}}, /* Zarma */
{"djr", {HB_TAG('D','J','R','0')}}, /* Djambarrpuyngu */
{"dng", {HB_TAG('D','U','N',' ')}}, /* Dungan */
{"dnj", {HB_TAG('D','N','J',' ')}}, /* Dan */
{"doi", {HB_TAG('D','G','R',' ')}}, /* Dogri [macrolanguage] */
{"dsb", {HB_TAG('L','S','B',' ')}}, /* Lower Sorbian */
{"duj", {HB_TAG('D','U','J',' ')}}, /* Dhuwal */
{"dv", {HB_TAG('D','I','V',' ')}}, /* Dhivehi/Divehi/Maldivian */
{"dyu", {HB_TAG('J','U','L',' ')}}, /* Jula */
{"dz", {HB_TAG('D','Z','N',' ')}}, /* Dzongkha */
{"ee", {HB_TAG('E','W','E',' ')}}, /* Ewe */
{"efi", {HB_TAG('E','F','I',' ')}}, /* Efik */
{"ekk", {HB_TAG('E','T','I',' ')}}, /* Standard Estonian */
{"el", {HB_TAG('E','L','L',' ')}}, /* Modern Greek (1453-) */
{"emk", {HB_TAG('M','N','K',' ')}}, /* Eastern Maninkakan */
{"en", {HB_TAG('E','N','G',' ')}}, /* English */
{"enf", {HB_TAG('F','N','E',' ')}}, /* Forest Nenets */
{"enh", {HB_TAG('T','N','E',' ')}}, /* Tundra Nenets */
{"eo", {HB_TAG('N','T','O',' ')}}, /* Esperanto */
{"eot", {HB_TAG('B','T','I',' ')}}, /* Beti (Côte d'Ivoire) */
{"es", {HB_TAG('E','S','P',' ')}}, /* Spanish */
{"esu", {HB_TAG('E','S','U',' ')}}, /* Central Yupik */
{"et", {HB_TAG('E','T','I',' ')}}, /* Estonian [macrolanguage] */
{"eu", {HB_TAG('E','U','Q',' ')}}, /* Basque */
{"eve", {HB_TAG('E','V','N',' ')}}, /* Even */
{"evn", {HB_TAG('E','V','K',' ')}}, /* Evenki */
{"fa", {HB_TAG('F','A','R',' ')}}, /* Persian [macrolanguage] */
{"fan", {HB_TAG('F','A','N','0')}}, /* Fang */
{"fat", {HB_TAG('F','A','T',' ')}}, /* Fanti */
{"ff", {HB_TAG('F','U','L',' ')}}, /* Fulah [macrolanguage] */
{"fi", {HB_TAG('F','I','N',' ')}}, /* Finnish */
{"fil", {HB_TAG('P','I','L',' ')}}, /* Filipino */
{"fj", {HB_TAG('F','J','I',' ')}}, /* Fijian */
{"flm", {HB_TAG('H','A','L',' ')}}, /* Halam/Falam Chin [retired ISO639 code] */
{"fo", {HB_TAG('F','O','S',' ')}}, /* Faroese */
{"fon", {HB_TAG('F','O','N',' ')}}, /* Fon */
{"fr", {HB_TAG('F','R','A',' ')}}, /* French */
{"frc", {HB_TAG('F','R','C',' ')}}, /* Cajun French */
{"frp", {HB_TAG('F','R','P',' ')}}, /* Arpitan/Francoprovençal */
{"fuf", {HB_TAG('F','T','A',' ')}}, /* Futa */
{"fur", {HB_TAG('F','R','L',' ')}}, /* Friulian */
{"fuv", {HB_TAG('F','U','V',' ')}}, /* Nigerian Fulfulde */
{"fy", {HB_TAG('F','R','I',' ')}}, /* Western Frisian */
{"ga", {HB_TAG('I','R','I',' ')}}, /* Irish */
{"gaa", {HB_TAG('G','A','D',' ')}}, /* Ga */
{"gag", {HB_TAG('G','A','G',' ')}}, /* Gagauz */
{"gbm", {HB_TAG('G','A','W',' ')}}, /* Garhwali */
{"gd", {HB_TAG('G','A','E',' ')}}, /* Scottish Gaelic */
{"gez", {HB_TAG('G','E','Z',' ')}}, /* Ge'ez */
{"ggo", {HB_TAG('G','O','N',' ')}}, /* Southern Gondi */
{"gih", {HB_TAG('G','I','H',' ')}}, /* Githabul */
{"gil", {HB_TAG('G','I','L','0')}}, /* Kiribati (Gilbertese) */
{"gkp", {HB_TAG('G','K','P',' ')}}, /* Kpelle (Guinea) */
{"gl", {HB_TAG('G','A','L',' ')}}, /* Galician */
{"gld", {HB_TAG('N','A','N',' ')}}, /* Nanai */
{"glk", {HB_TAG('G','L','K',' ')}}, /* Gilaki */
{"gn", {HB_TAG('G','U','A',' ')}}, /* Guarani [macrolanguage] */
{"gnn", {HB_TAG('G','N','N',' ')}}, /* Gumatj */
{"gno", {HB_TAG('G','O','N',' ')}}, /* Northern Gondi */
{"gog", {HB_TAG('G','O','G',' ')}}, /* Gogo */
{"gon", {HB_TAG('G','O','N',' ')}}, /* Gondi [macrolanguage] */
{"grt", {HB_TAG('G','R','O',' ')}}, /* Garo */
{"gru", {HB_TAG('S','O','G',' ')}}, /* Sodo Gurage */
{"gsw", {HB_TAG('A','L','S',' ')}}, /* Alsatian */
{"gu", {HB_TAG('G','U','J',' ')}}, /* Gujarati */
{"guc", {HB_TAG('G','U','C',' ')}}, /* Wayuu */
{"guf", {HB_TAG('G','U','F',' ')}}, /* Gupapuyngu */
{"guk", {HB_TAG('G','M','Z',' ')}}, /* Gumuz */
/*{"guk", {HB_TAG('G','U','K',' ')}},*/ /* Gumuz (in SIL fonts) */
{"guz", {HB_TAG('G','U','Z',' ')}}, /* Ekegusii/Gusii */
{"gv", {HB_TAG('M','N','X',' ')}}, /* Manx */
{"ha", {HB_TAG('H','A','U',' ')}}, /* Hausa */
{"har", {HB_TAG('H','R','I',' ')}}, /* Harari */
{"haw", {HB_TAG('H','A','W',' ')}}, /* Hawaiian */
{"hay", {HB_TAG('H','A','Y',' ')}}, /* Haya */
{"haz", {HB_TAG('H','A','Z',' ')}}, /* Hazaragi */
{"he", {HB_TAG('I','W','R',' ')}}, /* Hebrew */
{"hi", {HB_TAG('H','I','N',' ')}}, /* Hindi */
{"hil", {HB_TAG('H','I','L',' ')}}, /* Hiligaynon */
{"hlt", {HB_TAG('Q','I','N',' ')}}, /* Matu Chin */
{"hmn", {HB_TAG('H','M','N',' ')}}, /* Hmong */
{"hnd", {HB_TAG('H','N','D',' ')}}, /* [Southern] Hindko */
{"hne", {HB_TAG('C','H','H',' ')}}, /* Chattisgarhi */
{"hno", {HB_TAG('H','N','D',' ')}}, /* [Northern] Hindko */
{"ho", {HB_TAG('H','M','O',' ')}}, /* Hiri Motu */
{"hoc", {HB_TAG('H','O',' ',' ')}}, /* Ho */
{"hoj", {HB_TAG('H','A','R',' ')}}, /* Harauti */
{"hr", {HB_TAG('H','R','V',' ')}}, /* Croatian */
{"hsb", {HB_TAG('U','S','B',' ')}}, /* Upper Sorbian */
{"ht", {HB_TAG('H','A','I',' ')}}, /* Haitian/Haitian Creole */
{"hu", {HB_TAG('H','U','N',' ')}}, /* Hungarian */
{"hy", {HB_TAG('H','Y','E',' ')}}, /* Armenian */
{"hz", {HB_TAG('H','E','R',' ')}}, /* Herero */
{"ia", {HB_TAG('I','N','A',' ')}}, /* Interlingua (International Auxiliary Language Association) */
{"iba", {HB_TAG('I','B','A',' ')}}, /* Iban */
{"ibb", {HB_TAG('I','B','B',' ')}}, /* Ibibio */
{"id", {HB_TAG('I','N','D',' ')}}, /* Indonesian */
{"ie", {HB_TAG('I','L','E',' ')}}, /* Interlingue/Occidental */
{"ig", {HB_TAG('I','B','O',' ')}}, /* Igbo */
{"igb", {HB_TAG('E','B','I',' ')}}, /* Ebira */
{"ii", {HB_TAG('Y','I','M',' ')}}, /* Yi Modern */
{"ijc", {HB_TAG('I','J','O',' ')}}, /* Izon */
{"ijo", {HB_TAG('I','J','O',' ')}}, /* Ijo [family] */
{"ik", {HB_TAG('I','P','K',' ')}}, /* Inupiaq [macrolanguage] */
{"ilo", {HB_TAG('I','L','O',' ')}}, /* Ilokano */
{"inh", {HB_TAG('I','N','G',' ')}}, /* Ingush */
{"io", {HB_TAG('I','D','O',' ')}}, /* Ido */
{"is", {HB_TAG('I','S','L',' ')}}, /* Icelandic */
{"it", {HB_TAG('I','T','A',' ')}}, /* Italian */
{"iu", {HB_TAG('I','N','U',' ')}}, /* Inuktitut [macrolanguage] */
{"ja", {HB_TAG('J','A','N',' ')}}, /* Japanese */
{"jam", {HB_TAG('J','A','M',' ')}}, /* Jamaican Creole English */
{"jbo", {HB_TAG('J','B','O',' ')}}, /* Lojban */
{"jv", {HB_TAG('J','A','V',' ')}}, /* Javanese */
{"ka", {HB_TAG('K','A','T',' ')}}, /* Georgian */
{"kaa", {HB_TAG('K','R','K',' ')}}, /* Karakalpak */
{"kab", {HB_TAG('K','A','B','0')}}, /* Kabyle */
{"kam", {HB_TAG('K','M','B',' ')}}, /* Kamba (Kenya) */
{"kar", {HB_TAG('K','R','N',' ')}}, /* Karen [family] */
{"kat", {HB_TAG('K','G','E',' ')}}, /* Khutsuri Georgian */
{"kbd", {HB_TAG('K','A','B',' ')}}, /* Kabardian */
{"kde", {HB_TAG('K','D','E',' ')}}, /* Makonde */
{"kdr", {HB_TAG('K','R','M',' ')}}, /* Karaim */
{"kdt", {HB_TAG('K','U','Y',' ')}}, /* Kuy */
{"kea", {HB_TAG('K','E','A',' ')}}, /* Kabuverdianu (Crioulo) */
{"kek", {HB_TAG('K','E','K',' ')}}, /* Kekchi */
{"kex", {HB_TAG('K','K','N',' ')}}, /* Kokni */
{"kfa", {HB_TAG('K','O','D',' ')}}, /* Kodagu */
{"kfr", {HB_TAG('K','A','C',' ')}}, /* Kachchi */
{"kfx", {HB_TAG('K','U','L',' ')}}, /* Kulvi */
{"kfy", {HB_TAG('K','M','N',' ')}}, /* Kumaoni */
{"kg", {HB_TAG('K','O','N',' ')}}, /* Kongo [macrolanguage] */
{"kha", {HB_TAG('K','S','I',' ')}}, /* Khasi */
{"khb", {HB_TAG('X','B','D',' ')}}, /* Lü */
{"kht", {HB_TAG('K','H','N',' ')}}, /* Khamti (Microsoft fonts) */
/*{"kht", {HB_TAG('K','H','T',' ')}},*/ /* Khamti (OpenType spec and SIL fonts) */
{"khw", {HB_TAG('K','H','W',' ')}}, /* Khowar */
{"ki", {HB_TAG('K','I','K',' ')}}, /* Gikuyu/Kikuyu */
{"kiu", {HB_TAG('K','I','U',' ')}}, /* Kirmanjki */
{"kj", {HB_TAG('K','U','A',' ')}}, /* Kuanyama/Kwanyama */
{"kjd", {HB_TAG('K','J','D',' ')}}, /* Southern Kiwai */
{"kjh", {HB_TAG('K','H','A',' ')}}, /* Khakass */
{"kjp", {HB_TAG('K','J','P',' ')}}, /* Pwo Eastern Karen */
{"kk", {HB_TAG('K','A','Z',' ')}}, /* Kazakh */
{"kl", {HB_TAG('G','R','N',' ')}}, /* Kalaallisut */
{"kln", {HB_TAG('K','A','L',' ')}}, /* Kalenjin */
{"km", {HB_TAG('K','H','M',' ')}}, /* Central Khmer */
{"kmb", {HB_TAG('M','B','N',' ')}}, /* Kimbundu */
{"kmw", {HB_TAG('K','M','O',' ')}}, /* Komo (Democratic Republic of Congo) */
{"kn", {HB_TAG('K','A','N',' ')}}, /* Kannada */
{"knn", {HB_TAG('K','O','K',' ')}}, /* Konkani */
{"ko", {HB_TAG('K','O','R',' ')}}, /* Korean */
{"koi", {HB_TAG('K','O','P',' ')}}, /* Komi-Permyak */
{"kok", {HB_TAG('K','O','K',' ')}}, /* Konkani [macrolanguage] */
{"kon", {HB_TAG('K','O','N','0')}}, /* Kongo */
{"kos", {HB_TAG('K','O','S',' ')}}, /* Kosraean */
{"kpe", {HB_TAG('K','P','L',' ')}}, /* Kpelle [macrolanguage] */
{"kpv", {HB_TAG('K','O','Z',' ')}}, /* Komi-Zyrian */
{"kpy", {HB_TAG('K','Y','K',' ')}}, /* Koryak */
{"kqy", {HB_TAG('K','R','T',' ')}}, /* Koorete */
{"kr", {HB_TAG('K','N','R',' ')}}, /* Kanuri [macrolanguage] */
{"kri", {HB_TAG('K','R','I',' ')}}, /* Krio */
{"krl", {HB_TAG('K','R','L',' ')}}, /* Karelian */
{"kru", {HB_TAG('K','U','U',' ')}}, /* Kurukh */
{"ks", {HB_TAG('K','S','H',' ')}}, /* Kashmiri */
{"ksh", {HB_TAG('K','S','H','0')}}, /* Ripuarian, Kölsch */
/*{"ksw", {HB_TAG('K','R','N',' ')}},*/ /* S'gaw Karen (Microsoft fonts?) */
{"ksw", {HB_TAG('K','S','W',' ')}}, /* S'gaw Karen (OpenType spec and SIL fonts) */
{"ktb", {HB_TAG('K','E','B',' ')}}, /* Kebena */
{"ktu", {HB_TAG('K','O','N',' ')}}, /* Kikongo */
{"ku", {HB_TAG('K','U','R',' ')}}, /* Kurdish [macrolanguage] */
{"kum", {HB_TAG('K','U','M',' ')}}, /* Kumyk */
{"kv", {HB_TAG('K','O','M',' ')}}, /* Komi [macrolanguage] */
{"kvd", {HB_TAG('K','U','I',' ')}}, /* Kui (Indonesia) */
{"kw", {HB_TAG('C','O','R',' ')}}, /* Cornish */
{"kxc", {HB_TAG('K','M','S',' ')}}, /* Komso */
{"kxu", {HB_TAG('K','U','I',' ')}}, /* Kui (India) */
{"ky", {HB_TAG('K','I','R',' ')}}, /* Kirghiz/Kyrgyz */
{"kyu", {HB_TAG('K','Y','U',' ')}}, /* Western Kayah */
{"la", {HB_TAG('L','A','T',' ')}}, /* Latin */
{"lad", {HB_TAG('J','U','D',' ')}}, /* Ladino */
{"lb", {HB_TAG('L','T','Z',' ')}}, /* Luxembourgish */
{"lbe", {HB_TAG('L','A','K',' ')}}, /* Lak */
{"lbj", {HB_TAG('L','D','K',' ')}}, /* Ladakhi */
{"lez", {HB_TAG('L','E','Z',' ')}}, /* Lezgi */
{"lg", {HB_TAG('L','U','G',' ')}}, /* Ganda */
{"li", {HB_TAG('L','I','M',' ')}}, /* Limburgan/Limburger/Limburgish */
{"lif", {HB_TAG('L','M','B',' ')}}, /* Limbu */
{"lij", {HB_TAG('L','I','J',' ')}}, /* Ligurian */
{"lis", {HB_TAG('L','I','S',' ')}}, /* Lisu */
{"ljp", {HB_TAG('L','J','P',' ')}}, /* Lampung Api */
{"lki", {HB_TAG('L','K','I',' ')}}, /* Laki */
{"lld", {HB_TAG('L','A','D',' ')}}, /* Ladin */
{"lmn", {HB_TAG('L','A','M',' ')}}, /* Lambani */
{"lmo", {HB_TAG('L','M','O',' ')}}, /* Lombard */
{"ln", {HB_TAG('L','I','N',' ')}}, /* Lingala */
{"lo", {HB_TAG('L','A','O',' ')}}, /* Lao */
{"lom", {HB_TAG('L','O','M',' ')}}, /* Loma */
{"lrc", {HB_TAG('L','R','C',' ')}}, /* Northern Luri */
{"lt", {HB_TAG('L','T','H',' ')}}, /* Lithuanian */
{"lu", {HB_TAG('L','U','B',' ')}}, /* Luba-Katanga */
{"lua", {HB_TAG('L','U','B',' ')}}, /* Luba-Kasai */
{"luo", {HB_TAG('L','U','O',' ')}}, /* Luo (Kenya and Tanzania) */
{"lus", {HB_TAG('M','I','Z',' ')}}, /* Mizo */
{"luy", {HB_TAG('L','U','H',' ')}}, /* Luyia/Oluluyia [macrolanguage] */
{"luz", {HB_TAG('L','R','C',' ')}}, /* Southern Luri */
{"lv", {HB_TAG('L','V','I',' ')}}, /* Latvian */
{"lzz", {HB_TAG('L','A','Z',' ')}}, /* Laz */
{"mad", {HB_TAG('M','A','D',' ')}}, /* Madurese */
{"mag", {HB_TAG('M','A','G',' ')}}, /* Magahi */
{"mai", {HB_TAG('M','T','H',' ')}}, /* Maithili */
{"mak", {HB_TAG('M','K','R',' ')}}, /* Makasar */
{"mam", {HB_TAG('M','A','M',' ')}}, /* Mam */
{"man", {HB_TAG('M','N','K',' ')}}, /* Manding/Mandingo [macrolanguage] */
{"mdc", {HB_TAG('M','L','E',' ')}}, /* Male (Papua New Guinea) */
{"mdf", {HB_TAG('M','O','K',' ')}}, /* Moksha */
{"mdr", {HB_TAG('M','D','R',' ')}}, /* Mandar */
{"mdy", {HB_TAG('M','L','E',' ')}}, /* Male (Ethiopia) */
{"men", {HB_TAG('M','D','E',' ')}}, /* Mende (Sierra Leone) */
{"mer", {HB_TAG('M','E','R',' ')}}, /* Meru */
{"mfe", {HB_TAG('M','F','E',' ')}}, /* Morisyen */
{"mg", {HB_TAG('M','L','G',' ')}}, /* Malagasy [macrolanguage] */
{"mh", {HB_TAG('M','A','H',' ')}}, /* Marshallese */
{"mhr", {HB_TAG('L','M','A',' ')}}, /* Low Mari */
{"mi", {HB_TAG('M','R','I',' ')}}, /* Maori */
{"min", {HB_TAG('M','I','N',' ')}}, /* Minangkabau */
{"mk", {HB_TAG('M','K','D',' ')}}, /* Macedonian */
{"mku", {HB_TAG('M','N','K',' ')}}, /* Konyanka Maninka */
{"mkw", {HB_TAG('M','K','W',' ')}}, /* Kituba (Congo) */
{"ml", {HB_TAG('M','L','R',' ')}}, /* Malayalam */
{"mlq", {HB_TAG('M','N','K',' ')}}, /* Western Maninkakan */
{"mn", {HB_TAG('M','N','G',' ')}}, /* Mongolian [macrolanguage] */
{"mnc", {HB_TAG('M','C','H',' ')}}, /* Manchu */
{"mni", {HB_TAG('M','N','I',' ')}}, /* Manipuri */
{"mnk", {HB_TAG('M','N','D',' ')}}, /* Mandinka */
{"mns", {HB_TAG('M','A','N',' ')}}, /* Mansi */
{"mnw", {HB_TAG('M','O','N',' ')}}, /* Mon */
{"mo", {HB_TAG('M','O','L',' ')}}, /* Moldavian */
{"moh", {HB_TAG('M','O','H',' ')}}, /* Mohawk */
{"mos", {HB_TAG('M','O','S',' ')}}, /* Mossi */
{"mpe", {HB_TAG('M','A','J',' ')}}, /* Majang */
{"mr", {HB_TAG('M','A','R',' ')}}, /* Marathi */
{"mrh", {HB_TAG('Q','I','N',' ')}}, /* Mara Chin */
{"mrj", {HB_TAG('H','M','A',' ')}}, /* High Mari */
{"ms", {HB_TAG('M','L','Y',' ')}}, /* Malay [macrolanguage] */
{"msc", {HB_TAG('M','N','K',' ')}}, /* Sankaran Maninka */
{"mt", {HB_TAG('M','T','S',' ')}}, /* Maltese */
{"mtr", {HB_TAG('M','A','W',' ')}}, /* Mewari */
{"mus", {HB_TAG('M','U','S',' ')}}, /* Creek */
{"mve", {HB_TAG('M','A','W',' ')}}, /* Marwari (Pakistan) */
{"mwk", {HB_TAG('M','N','K',' ')}}, /* Kita Maninkakan */
{"mwl", {HB_TAG('M','W','L',' ')}}, /* Mirandese */
{"mwr", {HB_TAG('M','A','W',' ')}}, /* Marwari [macrolanguage] */
{"mww", {HB_TAG('M','W','W',' ')}}, /* Hmong Daw */
{"my", {HB_TAG('B','R','M',' ')}}, /* Burmese */
{"mym", {HB_TAG('M','E','N',' ')}}, /* Me'en */
{"myn", {HB_TAG('M','Y','N',' ')}}, /* Mayan */
{"myq", {HB_TAG('M','N','K',' ')}}, /* Forest Maninka (retired code) */
{"myv", {HB_TAG('E','R','Z',' ')}}, /* Erzya */
{"mzn", {HB_TAG('M','Z','N',' ')}}, /* Mazanderani */
{"na", {HB_TAG('N','A','U',' ')}}, /* Nauru */
{"nag", {HB_TAG('N','A','G',' ')}}, /* Naga-Assamese */
{"nah", {HB_TAG('N','A','H',' ')}}, /* Nahuatl [family] */
{"nap", {HB_TAG('N','A','P',' ')}}, /* Neapolitan */
{"nb", {HB_TAG('N','O','R',' ')}}, /* Norwegian Bokmål */
{"nco", {HB_TAG('S','I','B',' ')}}, /* Sibe */
{"nd", {HB_TAG('N','D','B',' ')}}, /* [North] Ndebele */
{"ndc", {HB_TAG('N','D','C',' ')}}, /* Ndau */
{"nds", {HB_TAG('N','D','S',' ')}}, /* Low German/Low Saxon */
{"ne", {HB_TAG('N','E','P',' ')}}, /* Nepali */
{"new", {HB_TAG('N','E','W',' ')}}, /* Newari */
{"ng", {HB_TAG('N','D','G',' ')}}, /* Ndonga */
{"nga", {HB_TAG('N','G','A',' ')}}, /* Ngabaka */
{"ngl", {HB_TAG('L','M','W',' ')}}, /* Lomwe */
{"ngo", {HB_TAG('S','X','T',' ')}}, /* Sutu */
{"niu", {HB_TAG('N','I','U',' ')}}, /* Niuean */
{"niv", {HB_TAG('G','I','L',' ')}}, /* Gilyak */
{"nl", {HB_TAG('N','L','D',' ')}}, /* Dutch */
{"nn", {HB_TAG('N','Y','N',' ')}}, /* Norwegian Nynorsk */
{"no", {HB_TAG('N','O','R',' ')}}, /* Norwegian [macrolanguage] */
{"nod", {HB_TAG('N','T','A',' ')}}, /* Northern Thai */
{"noe", {HB_TAG('N','O','E',' ')}}, /* Nimadi */
{"nog", {HB_TAG('N','O','G',' ')}}, /* Nogai */
{"nov", {HB_TAG('N','O','V',' ')}}, /* Novial */
{"nqo", {HB_TAG('N','K','O',' ')}}, /* N'Ko */
{"nr", {HB_TAG('N','D','B',' ')}}, /* [South] Ndebele */
{"nsk", {HB_TAG('N','A','S',' ')}}, /* Naskapi */
{"nso", {HB_TAG('S','O','T',' ')}}, /* [Northern] Sotho */
{"nv", {HB_TAG('N','A','V',' ')}}, /* Navajo */
{"ny", {HB_TAG('C','H','I',' ')}}, /* Chewa/Chichwa/Nyanja */
{"nym", {HB_TAG('N','Y','M',' ')}}, /* Nyamwezi */
{"nyn", {HB_TAG('N','K','L',' ')}}, /* Nyankole */
{"oc", {HB_TAG('O','C','I',' ')}}, /* Occitan (post 1500) */
{"oj", {HB_TAG('O','J','B',' ')}}, /* Ojibwa [macrolanguage] */
{"ojs", {HB_TAG('O','C','R',' ')}}, /* Oji-Cree */
{"okm", {HB_TAG('K','O','H',' ')}}, /* Korean Old Hangul */
{"om", {HB_TAG('O','R','O',' ')}}, /* Oromo [macrolanguage] */
{"or", {HB_TAG('O','R','I',' ')}}, /* Oriya */
{"os", {HB_TAG('O','S','S',' ')}}, /* Ossetian */
{"pa", {HB_TAG('P','A','N',' ')}}, /* Panjabi */
{"pag", {HB_TAG('P','A','G',' ')}}, /* Pangasinan */
{"pam", {HB_TAG('P','A','M',' ')}}, /* Kapampangan/Pampanga */
{"pap", {HB_TAG('P','A','P','0')}}, /* Papiamento */
{"pau", {HB_TAG('P','A','U',' ')}}, /* Palauan */
{"pcc", {HB_TAG('P','C','C',' ')}}, /* Bouyei */
{"pcd", {HB_TAG('P','C','D',' ')}}, /* Picard */
{"pce", {HB_TAG('P','L','G',' ')}}, /* [Ruching] Palaung */
{"pck", {HB_TAG('Q','I','N',' ')}}, /* Paite Chin */
{"pdc", {HB_TAG('P','D','C',' ')}}, /* Pennsylvania German */
{"pes", {HB_TAG('F','A','R',' ')}}, /* Iranian Persian */
{"phk", {HB_TAG('P','H','K',' ')}}, /* Phake */
{"pi", {HB_TAG('P','A','L',' ')}}, /* Pali */
{"pih", {HB_TAG('P','I','H',' ')}}, /* Pitcairn-Norfolk */
{"pl", {HB_TAG('P','L','K',' ')}}, /* Polish */
{"pll", {HB_TAG('P','L','G',' ')}}, /* [Shwe] Palaung */
{"plp", {HB_TAG('P','A','P',' ')}}, /* Palpa */
{"pms", {HB_TAG('P','M','S',' ')}}, /* Piemontese */
{"pnb", {HB_TAG('P','N','B',' ')}}, /* Western Panjabi */
{"poh", {HB_TAG('P','O','H',' ')}}, /* Pocomchi */
{"pon", {HB_TAG('P','O','N',' ')}}, /* Pohnpeian */
{"prs", {HB_TAG('D','R','I',' ')}}, /* Afghan Persian/Dari */
{"ps", {HB_TAG('P','A','S',' ')}}, /* Pashto/Pushto [macrolanguage] */
{"pt", {HB_TAG('P','T','G',' ')}}, /* Portuguese */
{"pwo", {HB_TAG('P','W','O',' ')}}, /* Pwo Western Karen */
{"qu", {HB_TAG('Q','U','Z',' ')}}, /* Quechua [macrolanguage] */
{"quc", {HB_TAG('Q','U','C',' ')}}, /* K'iche'/Quiché */
{"quh", {HB_TAG('Q','U','H',' ')}}, /* Quechua (Bolivia) */
{"quz", {HB_TAG('Q','U','Z',' ')}}, /* Cusco Quechua */
{"qvi", {HB_TAG('Q','V','I',' ')}}, /* Quechua (Ecuador) */
{"qwh", {HB_TAG('Q','W','H',' ')}}, /* Quechua (Peru) */
{"raj", {HB_TAG('R','A','J',' ')}}, /* Rajasthani [macrolanguage] */
{"rar", {HB_TAG('R','A','R',' ')}}, /* Rarotongan */
{"rbb", {HB_TAG('P','L','G',' ')}}, /* Rumai Palaung */
{"rej", {HB_TAG('R','E','J',' ')}}, /* Rejang */
{"ria", {HB_TAG('R','I','A',' ')}}, /* Riang (India) */
{"rif", {HB_TAG('R','I','F',' ')}}, /* Tarifit */
{"ril", {HB_TAG('R','I','A',' ')}}, /* Riang (Myanmar) */
{"rit", {HB_TAG('R','I','T',' ')}}, /* Ritarungo */
{"rki", {HB_TAG('A','R','K',' ')}}, /* Rakhine */
{"rkw", {HB_TAG('R','K','W',' ')}}, /* Arakwal */
{"rm", {HB_TAG('R','M','S',' ')}}, /* Romansh */
{"rmy", {HB_TAG('R','M','Y',' ')}}, /* Vlax Romani */
{"rn", {HB_TAG('R','U','N',' ')}}, /* Rundi */
{"ro", {HB_TAG('R','O','M',' ')}}, /* Romanian */
{"rom", {HB_TAG('R','O','Y',' ')}}, /* Romany [macrolanguage] */
{"rtm", {HB_TAG('R','T','M',' ')}}, /* Rotuman */
{"ru", {HB_TAG('R','U','S',' ')}}, /* Russian */
{"rue", {HB_TAG('R','S','Y',' ')}}, /* Rusyn */
{"rup", {HB_TAG('R','U','P',' ')}}, /* Aromanian/Arumanian/Macedo-Romanian */
{"rw", {HB_TAG('R','U','A',' ')}}, /* Kinyarwanda */
{"rwr", {HB_TAG('M','A','W',' ')}}, /* Marwari (India) */
{"sa", {HB_TAG('S','A','N',' ')}}, /* Sanskrit */
{"sah", {HB_TAG('Y','A','K',' ')}}, /* Yakut */
{"sam", {HB_TAG('P','A','A',' ')}}, /* Palestinian Aramaic */
{"sas", {HB_TAG('S','A','S',' ')}}, /* Sasak */
{"sat", {HB_TAG('S','A','T',' ')}}, /* Santali */
{"sc", {HB_TAG('S','R','D',' ')}}, /* Sardinian [macrolanguage] */
{"sck", {HB_TAG('S','A','D',' ')}}, /* Sadri */
{"scn", {HB_TAG('S','C','N',' ')}}, /* Sicilian */
{"sco", {HB_TAG('S','C','O',' ')}}, /* Scots */
{"scs", {HB_TAG('S','L','A',' ')}}, /* [North] Slavey */
{"sd", {HB_TAG('S','N','D',' ')}}, /* Sindhi */
{"se", {HB_TAG('N','S','M',' ')}}, /* Northern Sami */
{"seh", {HB_TAG('S','N','A',' ')}}, /* Sena */
{"sel", {HB_TAG('S','E','L',' ')}}, /* Selkup */
{"sez", {HB_TAG('Q','I','N',' ')}}, /* Senthang Chin */
{"sg", {HB_TAG('S','G','O',' ')}}, /* Sango */
{"sga", {HB_TAG('S','G','A',' ')}}, /* Old Irish (to 900) */
{"sgs", {HB_TAG('S','G','S',' ')}}, /* Samogitian */
{"sgw", {HB_TAG('C','H','G',' ')}}, /* Sebat Bet Gurage */
/*{"sgw", {HB_TAG('S','G','W',' ')}},*/ /* Sebat Bet Gurage (in SIL fonts) */
{"shi", {HB_TAG('S','H','I',' ')}}, /* Tachelhit */
{"shn", {HB_TAG('S','H','N',' ')}}, /* Shan */
{"si", {HB_TAG('S','N','H',' ')}}, /* Sinhala */
{"sid", {HB_TAG('S','I','D',' ')}}, /* Sidamo */
{"sjd", {HB_TAG('K','S','M',' ')}}, /* Kildin Sami */
{"sk", {HB_TAG('S','K','Y',' ')}}, /* Slovak */
{"skr", {HB_TAG('S','R','K',' ')}}, /* Seraiki */
{"sl", {HB_TAG('S','L','V',' ')}}, /* Slovenian */
{"sm", {HB_TAG('S','M','O',' ')}}, /* Samoan */
{"sma", {HB_TAG('S','S','M',' ')}}, /* Southern Sami */
{"smj", {HB_TAG('L','S','M',' ')}}, /* Lule Sami */
{"smn", {HB_TAG('I','S','M',' ')}}, /* Inari Sami */
{"sms", {HB_TAG('S','K','S',' ')}}, /* Skolt Sami */
{"sn", {HB_TAG('S','N','A','0')}}, /* Shona */
{"snk", {HB_TAG('S','N','K',' ')}}, /* Soninke */
{"so", {HB_TAG('S','M','L',' ')}}, /* Somali */
{"sop", {HB_TAG('S','O','P',' ')}}, /* Songe */
{"sq", {HB_TAG('S','Q','I',' ')}}, /* Albanian [macrolanguage] */
{"sr", {HB_TAG('S','R','B',' ')}}, /* Serbian */
{"srr", {HB_TAG('S','R','R',' ')}}, /* Serer */
{"ss", {HB_TAG('S','W','Z',' ')}}, /* Swati */
{"st", {HB_TAG('S','O','T',' ')}}, /* [Southern] Sotho */
{"stq", {HB_TAG('S','T','Q',' ')}}, /* Saterfriesisch */
{"stv", {HB_TAG('S','I','G',' ')}}, /* Silt'e */
{"su", {HB_TAG('S','U','N',' ')}}, /* Sundanese */
{"suk", {HB_TAG('S','U','K',' ')}}, /* Sukama */
{"suq", {HB_TAG('S','U','R',' ')}}, /* Suri */
{"sv", {HB_TAG('S','V','E',' ')}}, /* Swedish */
{"sva", {HB_TAG('S','V','A',' ')}}, /* Svan */
{"sw", {HB_TAG('S','W','K',' ')}}, /* Swahili [macrolanguage] */
{"swb", {HB_TAG('C','M','R',' ')}}, /* Comorian */
{"swh", {HB_TAG('S','W','K',' ')}}, /* Kiswahili/Swahili */
{"swv", {HB_TAG('M','A','W',' ')}}, /* Shekhawati */
{"sxu", {HB_TAG('S','X','U',' ')}}, /* Upper Saxon */
{"syc", {HB_TAG('S','Y','R',' ')}}, /* Classical Syriac */
{"syl", {HB_TAG('S','Y','L',' ')}}, /* Sylheti */
{"syr", {HB_TAG('S','Y','R',' ')}}, /* Syriac [macrolanguage] */
{"szl", {HB_TAG('S','Z','L',' ')}}, /* Silesian */
{"ta", {HB_TAG('T','A','M',' ')}}, /* Tamil */
{"tab", {HB_TAG('T','A','B',' ')}}, /* Tabasaran */
{"tcp", {HB_TAG('Q','I','N',' ')}}, /* Tawr Chin */
{"tcy", {HB_TAG('T','U','L',' ')}}, /* Tulu */
{"tcz", {HB_TAG('Q','I','N',' ')}}, /* Thado Chin */
{"tdd", {HB_TAG('T','D','D',' ')}}, /* Tai Nüa */
{"te", {HB_TAG('T','E','L',' ')}}, /* Telugu */
{"tem", {HB_TAG('T','M','N',' ')}}, /* Temne */
{"tet", {HB_TAG('T','E','T',' ')}}, /* Tetum */
{"tg", {HB_TAG('T','A','J',' ')}}, /* Tajik */
{"th", {HB_TAG('T','H','A',' ')}}, /* Thai */
{"ti", {HB_TAG('T','G','Y',' ')}}, /* Tigrinya */
{"tig", {HB_TAG('T','G','R',' ')}}, /* Tigre */
{"tiv", {HB_TAG('T','I','V',' ')}}, /* Tiv */
{"tk", {HB_TAG('T','K','M',' ')}}, /* Turkmen */
{"tl", {HB_TAG('T','G','L',' ')}}, /* Tagalog */
{"tmh", {HB_TAG('T','M','H',' ')}}, /* Tamashek */
{"tn", {HB_TAG('T','N','A',' ')}}, /* Tswana */
{"to", {HB_TAG('T','G','N',' ')}}, /* Tonga (Tonga Islands) */
{"tod", {HB_TAG('T','O','D','0')}}, /* Toma */
{"toi", {HB_TAG('T','N','G',' ')}}, /* Tonga */
{"tpi", {HB_TAG('T','P','I',' ')}}, /* Tok Pisin */
{"tr", {HB_TAG('T','R','K',' ')}}, /* Turkish */
{"tru", {HB_TAG('T','U','A',' ')}}, /* Turoyo Aramaic */
{"ts", {HB_TAG('T','S','G',' ')}}, /* Tsonga */
{"tt", {HB_TAG('T','A','T',' ')}}, /* Tatar */
{"tum", {HB_TAG('T','U','M',' ')}}, /* Tumbuka */
{"tvl", {HB_TAG('T','V','L',' ')}}, /* Tuvalu */
{"tw", {HB_TAG('T','W','I',' ')}}, /* Twi */
{"ty", {HB_TAG('T','H','T',' ')}}, /* Tahitian */
{"tyv", {HB_TAG('T','U','V',' ')}}, /* Tuvin */
{"tyz", {HB_TAG('T','Y','Z',' ')}}, /* Tày */
{"tzm", {HB_TAG('T','Z','M',' ')}}, /* Central Atlas Tamazight */
{"tzo", {HB_TAG('T','Z','O',' ')}}, /* Tzotzil */
{"udm", {HB_TAG('U','D','M',' ')}}, /* Udmurt */
{"ug", {HB_TAG('U','Y','G',' ')}}, /* Uighur */
{"uk", {HB_TAG('U','K','R',' ')}}, /* Ukrainian */
{"umb", {HB_TAG('U','M','B',' ')}}, /* Umbundu */
{"unr", {HB_TAG('M','U','N',' ')}}, /* Mundari */
{"ur", {HB_TAG('U','R','D',' ')}}, /* Urdu */
{"uz", {HB_TAG('U','Z','B',' ')}}, /* Uzbek [macrolanguage] */
{"uzn", {HB_TAG('U','Z','B',' ')}}, /* Northern Uzbek */
{"uzs", {HB_TAG('U','Z','B',' ')}}, /* Southern Uzbek */
{"ve", {HB_TAG('V','E','N',' ')}}, /* Venda */
{"vec", {HB_TAG('V','E','C',' ')}}, /* Venetian */
{"vi", {HB_TAG('V','I','T',' ')}}, /* Vietnamese */
{"vls", {HB_TAG('F','L','E',' ')}}, /* Vlaams */
{"vmw", {HB_TAG('M','A','K',' ')}}, /* Makhuwa */
{"vo", {HB_TAG('V','O','L',' ')}}, /* Volapük */
{"vro", {HB_TAG('V','R','O',' ')}}, /* Võro */
{"wa", {HB_TAG('W','L','N',' ')}}, /* Walloon */
{"war", {HB_TAG('W','A','R',' ')}}, /* Waray (Philippines) */
{"wbm", {HB_TAG('W','A',' ',' ')}}, /* Wa */
{"wbr", {HB_TAG('W','A','G',' ')}}, /* Wagdi */
{"wle", {HB_TAG('S','I','G',' ')}}, /* Wolane */
{"wo", {HB_TAG('W','L','F',' ')}}, /* Wolof */
{"wry", {HB_TAG('M','A','W',' ')}}, /* Merwari */
{"wtm", {HB_TAG('W','T','M',' ')}}, /* Mewati */
{"xal", {HB_TAG('K','L','M',' ')}}, /* Kalmyk */
{"xan", {HB_TAG('S','E','K',' ')}}, /* Sekota */
{"xh", {HB_TAG('X','H','S',' ')}}, /* Xhosa */
{"xjb", {HB_TAG('X','J','B',' ')}}, /* Minjangbal */
{"xog", {HB_TAG('X','O','G',' ')}}, /* Soga */
{"xom", {HB_TAG('K','M','O',' ')}}, /* Komo (Sudan) */
{"xpe", {HB_TAG('X','P','E',' ')}}, /* Kpelle (Liberia) */
{"xsl", {HB_TAG('S','S','L',' ')}}, /* South Slavey */
{"xst", {HB_TAG('S','I','G',' ')}}, /* Silt'e (retired code) */
{"xwo", {HB_TAG('T','O','D',' ')}}, /* Written Oirat (Todo) */
{"yao", {HB_TAG('Y','A','O',' ')}}, /* Yao */
{"yap", {HB_TAG('Y','A','P',' ')}}, /* Yapese */
{"yi", {HB_TAG('J','I','I',' ')}}, /* Yiddish [macrolanguage] */
{"yo", {HB_TAG('Y','B','A',' ')}}, /* Yoruba */
{"yos", {HB_TAG('Q','I','N',' ')}}, /* Yos, deprecated by IANA in favor of Zou [zom] */
{"yso", {HB_TAG('N','I','S',' ')}}, /* Nisi (China) */
{"za", {HB_TAG('Z','H','A',' ')}}, /* Chuang/Zhuang [macrolanguage] */
{"zea", {HB_TAG('Z','E','A',' ')}}, /* Zeeuws */
{"zgh", {HB_TAG('Z','G','H',' ')}}, /* Standard Morrocan Tamazigh */
{"zne", {HB_TAG('Z','N','D',' ')}}, /* Zande */
{"zom", {HB_TAG('Q','I','N',' ')}}, /* Zou */
{"zu", {HB_TAG('Z','U','L',' ')}}, /* Zulu */
{"zum", {HB_TAG('L','R','C',' ')}}, /* Kumzari */
{"zza", {HB_TAG('Z','Z','A',' ')}}, /* Zazaki */
/* The corresponding languages IDs for the following IDs are unclear,
* overlap, or are architecturally weird. Needs more research. */
/*{"chp", {HB_TAG('S','A','Y',' ')}},*/ /* Sayisi */
/*{"cwd", {HB_TAG('T','C','R',' ')}},*/ /* TH-Cree */
/*{"emk", {HB_TAG('E','M','K',' ')}},*/ /* Eastern Maninkakan */
/*{"krc", {HB_TAG('B','A','L',' ')}},*/ /* Balkar */
/*{"??", {HB_TAG('B','C','R',' ')}},*/ /* Bible Cree */
/*{"zh?", {HB_TAG('C','H','N',' ')}},*/ /* Chinese (seen in Microsoft fonts) */
/*{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},*/ /* Garshuni */
/*{"hy?", {HB_TAG('H','Y','E','0')}},*/ /* Armenian East (ISO 639-3 hye according to Microsoft, but thats equivalent to ISO 639-1 hy) */
/*{"ga-Latg?/" {HB_TAG('I','R','T',' ')}},*/ /* Irish Traditional */
/*{"krc", {HB_TAG('K','A','R',' ')}},*/ /* Karachay */
/*{"ka-Geok?", {HB_TAG('K','G','E',' ')}},*/ /* Khutsuri Georgian */
/*{"kca", {HB_TAG('K','H','K',' ')}},*/ /* Khanty-Kazim */
/*{"kca", {HB_TAG('K','H','S',' ')}},*/ /* Khanty-Shurishkar */
/*{"kca", {HB_TAG('K','H','V',' ')}},*/ /* Khanty-Vakhi */
/*{"kqs, kss", {HB_TAG('K','I','S',' ')}},*/ /* Kisii */
/*{"lua", {HB_TAG('L','U','A',' ')}},*/ /* Luba-Lulua */
/*{"mlq", {HB_TAG('M','L','N',' ')}},*/ /* Malinke */
/*{"nso", {HB_TAG('N','S','O',' ')}},*/ /* Sotho, Northern */
/*{"??", {HB_TAG('M','A','L',' ')}},*/ /* Malayalam Traditional */
/*{"csw", {HB_TAG('N','C','R',' ')}},*/ /* N-Cree */
/*{"csw", {HB_TAG('N','H','C',' ')}},*/ /* Norway House Cree */
/*{"el-polyton", {HB_TAG('P','G','R',' ')}},*/ /* Polytonic Greek */
/*{"bgr, cnh, cnw, czt, sez, tcp, csy, ctd, flm, pck, tcz, zom, cmr, dao, hlt, cka, cnk, mrh, mwg, cbl, cnb, csh", {HB_TAG('Q','I','N',' ')}},*/ /* Chin */
/*{"??", {HB_TAG('Y','I','C',' ')}},*/ /* Yi Classic */
/*{"zh-Latn-pinyin", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */
};
typedef struct {
char language[11];
hb_tag_t tag;
} LangTagLong;
static const LangTagLong ot_languages_zh[] = {
/* Store longest-first, if one is a prefix of another. */
{"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */
{"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */
{"zh-mo", HB_TAG('Z','H','H',' ')}, /* Chinese (Macao) */
{"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */
{"zh-tw", HB_TAG('Z','H','T',' ')}, /* Chinese (Taiwan) */
{"zh-hans", HB_TAG('Z','H','S',' ')}, /* Chinese (Simplified) */
{"zh-hant-hk",HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */
{"zh-hant-mo",HB_TAG('Z','H','H',' ')}, /* Chinese (Macao) */
{"zh-hant", HB_TAG('Z','H','T',' ')}, /* Chinese (Traditional) */
};
static int static int
lang_compare_first_component (const void *pa, lang_compare_first_component (const void *pa,
const void *pb) const void *pb)
@ -912,23 +186,16 @@ lang_compare_first_component (const void *pa,
} }
static bool static bool
match_subtag (const char *lang_str, subtag_matches (const char *lang_str,
const char *limit, const char *limit,
unsigned int *count, const char *subtag)
hb_tag_t *tags,
const char *subtag,
hb_tag_t tag)
{ {
do { do {
const char *s = strstr (lang_str, subtag); const char *s = strstr (lang_str, subtag);
if (!s || s >= limit || s == lang_str) if (!s || s >= limit)
return false; return false;
if (!ISALNUM (s[strlen (subtag)])) if (!ISALNUM (s[strlen (subtag)]))
{
tags[0] = tag;
*count = 1;
return true; return true;
}
lang_str = s + strlen (subtag); lang_str = s + strlen (subtag);
} while (1); } while (1);
} }
@ -942,6 +209,23 @@ lang_matches (const char *lang_str, const char *spec)
(lang_str[len] == '\0' || lang_str[len] == '-'); (lang_str[len] == '\0' || lang_str[len] == '-');
} }
typedef struct {
char language[4];
hb_tag_t tags[HB_OT_MAX_TAGS_PER_LANGUAGE];
} LangTag;
#include "hb-ot-tag-table.hh"
/* The corresponding languages IDs for the following IDs are unclear,
* overlap, or are architecturally weird. Needs more research. */
/*{"??", {HB_TAG('B','C','R',' ')}},*/ /* Bible Cree */
/*{"zh?", {HB_TAG('C','H','N',' ')}},*/ /* Chinese (seen in Microsoft fonts) */
/*{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},*/ /* Garshuni */
/*{"??", {HB_TAG('N','G','R',' ')}},*/ /* Nagari */
/*{"??", {HB_TAG('Y','I','C',' ')}},*/ /* Yi Classic */
/*{"zh?", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */
hb_tag_t hb_tag_t
hb_ot_tag_from_language (hb_language_t language) hb_ot_tag_from_language (hb_language_t language)
{ {
@ -960,36 +244,11 @@ hb_ot_tags_from_language (const char *lang_str,
{ {
const char *s; const char *s;
if (0 /* Check for matches of multiple subtags. */
/* if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
* "fonipa" is a variant tag in BCP-47, meaning the International Phonetic Alphabet. return;
* It can be applied to any language.
*/
|| match_subtag (lang_str, limit, count, tags, "-fonipa", HB_TAG('I','P','P','H'))
/*
* "fonnapa" is a variant tag in BCP-47, meaning the North American Phonetic Alphabet
* also known as Americanist Phonetic Notation. It can be applied to any language.
*/
|| match_subtag (lang_str, limit, count, tags, "-fonnapa", HB_TAG('A','P','P','H'))
/*
* "Syre" is a BCP-47 script tag, meaning the Estrangela variant of the Syriac script.
* It can be applied to any language.
*/
|| match_subtag (lang_str, limit, count, tags, "-syre", HB_TAG('S','Y','R','E'))
/*
* "Syrj" is a BCP-47 script tag, meaning the Western variant of the Syriac script.
* It can be applied to any language.
*/
|| match_subtag (lang_str, limit, count, tags, "-syrj", HB_TAG('S','Y','R','J'))
/*
* "Syrn" is a BCP-47 script tag, meaning the Eastern variant of the Syriac script.
* It can be applied to any language.
*/
|| match_subtag (lang_str, limit, count, tags, "-syrn", HB_TAG('S','Y','R','N'))
)
return;
/* Find a language matching in the first component */ /* Find a language matching in the first component. */
{ {
const LangTag *lang_tag; const LangTag *lang_tag;
lang_tag = (LangTag *) bsearch (lang_str, ot_languages, lang_tag = (LangTag *) bsearch (lang_str, ot_languages,
@ -1005,29 +264,6 @@ hb_ot_tags_from_language (const char *lang_str,
} }
} }
/* Otherwise, check the Chinese ones */
if (0 == lang_compare_first_component (lang_str, "zh"))
{
unsigned int i;
for (i = 0; i < ARRAY_LENGTH (ot_languages_zh); i++)
{
const LangTagLong *lang_tag;
lang_tag = &ot_languages_zh[i];
if (lang_matches (lang_str, lang_tag->language))
{
tags[0] = lang_tag->tag;
*count = 1;
return;
}
}
/* Otherwise just return 'ZHS ' */
tags[0] = HB_TAG('Z','H','S',' ');
*count = 1;
return;
}
s = strchr (lang_str, '-'); s = strchr (lang_str, '-');
if (!s) if (!s)
s = lang_str + strlen (lang_str); s = lang_str + strlen (lang_str);
@ -1160,36 +396,16 @@ hb_ot_tag_to_language (hb_tag_t tag)
if (tag == HB_OT_TAG_DEFAULT_LANGUAGE) if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
return nullptr; return nullptr;
/* struct LangTag has only room for 3-letter language tags. */ {
switch (tag) { hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
case HB_TAG('A','P','P','H'): /* Phonetic transcription—Americanist conventions */ if (disambiguated_tag != HB_LANGUAGE_INVALID)
return hb_language_from_string ("und-fonnapa", -1); return disambiguated_tag;
case HB_TAG('I','P','P','H'): /* Phonetic transcription—IPA conventions */
return hb_language_from_string ("und-fonipa", -1);
case HB_TAG('S','Y','R',' '): /* Syriac [macrolanguage] */
return hb_language_from_string ("syr", -1);
case HB_TAG('S','Y','R','E'): /* Estrangela Syriac */
return hb_language_from_string ("und-Syre", -1);
case HB_TAG('S','Y','R','J'): /* Western Syriac */
return hb_language_from_string ("und-Syrj", -1);
case HB_TAG('S','Y','R','N'): /* Eastern Syriac */
return hb_language_from_string ("und-Syrn", -1);
} }
for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
if (ot_languages[i].tags[0] == tag) if (ot_languages[i].tags[0] == tag)
return hb_language_from_string (ot_languages[i].language, -1); return hb_language_from_string (ot_languages[i].language, -1);
/* If tag starts with ZH, it's Chinese */
if ((tag & 0xFFFF0000u) == 0x5A480000u) {
switch (tag) {
case HB_TAG('Z','H','H',' '): return hb_language_from_string ("zh-hk", -1); /* Hong Kong */
case HB_TAG('Z','H','S',' '): return hb_language_from_string ("zh-Hans", -1); /* Simplified */
case HB_TAG('Z','H','T',' '): return hb_language_from_string ("zh-Hant", -1); /* Traditional */
default: break; /* Fall through */
}
}
/* Else return a custom language in the form of "x-hbotABCD" */ /* Else return a custom language in the form of "x-hbotABCD" */
{ {
unsigned char buf[11] = "x-hbot"; unsigned char buf[11] = "x-hbot";

View File

@ -40,7 +40,7 @@ HB_BEGIN_DECLS
#define HB_OT_TAG_DEFAULT_LANGUAGE HB_TAG ('d', 'f', 'l', 't') #define HB_OT_TAG_DEFAULT_LANGUAGE HB_TAG ('d', 'f', 'l', 't')
#define HB_OT_MAX_TAGS_PER_SCRIPT 2u #define HB_OT_MAX_TAGS_PER_SCRIPT 2u
#define HB_OT_MAX_TAGS_PER_LANGUAGE 1u #define HB_OT_MAX_TAGS_PER_LANGUAGE 3u
HB_EXTERN void HB_EXTERN void
hb_ot_tags (hb_script_t script, hb_ot_tags (hb_script_t script,

View File

@ -269,27 +269,27 @@ test_ot_tag_language (void)
test_language_two_way ("TUA", "tru"); /* Turoyo Aramaic */ test_language_two_way ("TUA", "tru"); /* Turoyo Aramaic */
test_language_two_way ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */
test_tag_from_language ("ZHS", "zh"); /* Chinese */ test_tag_from_language ("ZHS", "zh"); /* Chinese */
test_tag_from_language ("ZHS", "zh-cn"); /* Chinese (China) */ test_tag_from_language ("ZHS", "zh-cn"); /* Chinese (China) */
test_tag_from_language ("ZHS", "zh-sg"); /* Chinese (Singapore) */ test_tag_from_language ("ZHS", "zh-sg"); /* Chinese (Singapore) */
test_tag_from_language ("ZHH", "zh-mo"); /* Chinese (Macao) */ test_tag_from_language ("ZHH", "zh-mo"); /* Chinese (Macao) */
test_tag_from_language ("ZHH", "zh-hant-mo"); /* Chinese (Macao) */ test_tag_from_language ("ZHH", "zh-hant-mo"); /* Chinese (Macao) */
test_tag_from_language ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */ test_language_two_way ("ZHH", "zh-HK"); /* Chinese (Hong Kong) */
test_tag_from_language ("ZHH", "zH-HanT-hK"); /* Chinese (Hong Kong) */ test_tag_from_language ("ZHH", "zH-HanT-hK"); /* Chinese (Hong Kong) */
test_tag_from_language ("ZHT", "zh-tw"); /* Chinese (Taiwan) */ test_tag_from_language ("ZHT", "zh-tw"); /* Chinese (Taiwan) */
test_tag_from_language ("ZHS", "zh-Hans"); /* Chinese (Simplified) */ test_language_two_way ("ZHS", "zh-Hans"); /* Chinese (Simplified) */
test_tag_from_language ("ZHT", "zh-Hant"); /* Chinese (Traditional) */ test_language_two_way ("ZHT", "zh-Hant"); /* Chinese (Traditional) */
test_tag_from_language ("ZHS", "zh-xx"); /* Chinese (Other) */ test_tag_from_language ("ZHS", "zh-xx"); /* Chinese (Other) */
test_tag_from_language ("ZHS", "zh-Hans-TW");
test_tag_from_language ("ZHH", "yue");
test_tag_from_language ("ZHH", "yue-Hant");
test_tag_from_language ("ZHS", "yue-Hans");
test_tag_from_language ("ZHS", "zh"); /* Chinese */ test_tag_from_language ("ZHS", "zh"); /* Chinese */
test_tag_from_language ("ZHS", "zh-xx"); test_tag_from_language ("ZHS", "zh-xx");
test_tag_to_language ("ZHS", "zh-Hans");
test_tag_to_language ("ZHT", "zh-Hant");
test_tag_to_language ("ZHP", "x-hbotzhp");
test_language_two_way ("ABC", "x-hbotabc"); test_language_two_way ("ABC", "x-hbotabc");
test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc-zxc"); test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc-zxc");
test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc"); test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc");
@ -305,7 +305,7 @@ test_ot_tag_language (void)
* Invalid input. The precise answer does not matter, as long as it * Invalid input. The precise answer does not matter, as long as it
* does not crash or get into an infinite loop. * does not crash or get into an infinite loop.
*/ */
test_tag_from_language ("dflt", "-fonipa"); test_tag_from_language ("IPPH", "-fonipa");
/* /*
* Tags that contain "-fonipa" as a substring but which do not contain * Tags that contain "-fonipa" as a substring but which do not contain
@ -320,36 +320,45 @@ test_ot_tag_language (void)
test_tag_from_language ("IPPH", "en-fonipa"); test_tag_from_language ("IPPH", "en-fonipa");
test_tag_from_language ("IPPH", "en-fonipax-fonipa"); test_tag_from_language ("IPPH", "en-fonipax-fonipa");
test_tag_from_language ("IPPH", "rm-CH-fonipa-sursilv-x-foobar"); test_tag_from_language ("IPPH", "rm-CH-fonipa-sursilv-x-foobar");
test_tag_from_language ("IPPH", "und-fonipa"); test_language_two_way ("IPPH", "und-fonipa");
test_tag_from_language ("IPPH", "zh-fonipa"); test_tag_from_language ("IPPH", "zh-fonipa");
test_tag_to_language ("IPPH", "und-fonipa");
/* North American Phonetic Alphabet (Americanist Phonetic Notation) */ /* North American Phonetic Alphabet (Americanist Phonetic Notation) */
test_tag_from_language ("APPH", "en-fonnapa"); test_tag_from_language ("APPH", "en-fonnapa");
test_tag_from_language ("APPH", "chr-fonnapa"); test_tag_from_language ("APPH", "chr-fonnapa");
test_tag_from_language ("APPH", "und-fonnapa"); test_language_two_way ("APPH", "und-fonnapa");
test_tag_to_language ("APPH", "und-fonnapa");
/* Khutsuri Georgian */
test_tag_from_language ("KGE", "ka-Geok");
test_language_two_way ("KGE", "und-Geok");
/* Irish Traditional */
test_language_two_way ("IRT", "ga-Latg");
/* Moldavian */
test_language_two_way ("MOL", "ro-MD");
/* Polytonic Greek */
test_language_two_way ("PGR", "el-polyton");
test_tag_from_language ("PGR", "el-CY-polyton");
/* Estrangela Syriac */ /* Estrangela Syriac */
test_tag_from_language ("SYRE", "aii-Syre"); test_tag_from_language ("SYRE", "aii-Syre");
test_tag_from_language ("SYRE", "de-Syre"); test_tag_from_language ("SYRE", "de-Syre");
test_tag_from_language ("SYRE", "syr-Syre"); test_tag_from_language ("SYRE", "syr-Syre");
test_tag_from_language ("SYRE", "und-Syre"); test_language_two_way ("SYRE", "und-Syre");
test_tag_to_language ("SYRE", "und-Syre");
/* Western Syriac */ /* Western Syriac */
test_tag_from_language ("SYRJ", "aii-Syrj"); test_tag_from_language ("SYRJ", "aii-Syrj");
test_tag_from_language ("SYRJ", "de-Syrj"); test_tag_from_language ("SYRJ", "de-Syrj");
test_tag_from_language ("SYRJ", "syr-Syrj"); test_tag_from_language ("SYRJ", "syr-Syrj");
test_tag_from_language ("SYRJ", "und-Syrj"); test_language_two_way ("SYRJ", "und-Syrj");
test_tag_to_language ("SYRJ", "und-Syrj");
/* Eastern Syriac */ /* Eastern Syriac */
test_tag_from_language ("SYRN", "aii-Syrn"); test_tag_from_language ("SYRN", "aii-Syrn");
test_tag_from_language ("SYRN", "de-Syrn"); test_tag_from_language ("SYRN", "de-Syrn");
test_tag_from_language ("SYRN", "syr-Syrn"); test_tag_from_language ("SYRN", "syr-Syrn");
test_tag_from_language ("SYRN", "und-Syrn"); test_language_two_way ("SYRN", "und-Syrn");
test_tag_to_language ("SYRN", "und-Syrn");
/* Test that x-hbot overrides the base language */ /* Test that x-hbot overrides the base language */
test_tag_from_language ("ABC", "fa-x-hbotabc-zxc"); test_tag_from_language ("ABC", "fa-x-hbotabc-zxc");
@ -358,6 +367,18 @@ test_ot_tag_language (void)
test_tag_from_language ("ABC", "zh-cn-x-hbotabc-zxc"); test_tag_from_language ("ABC", "zh-cn-x-hbotabc-zxc");
test_tag_from_language ("ABC", "zh-xy-x-hbotabc-zxc"); test_tag_from_language ("ABC", "zh-xy-x-hbotabc-zxc");
test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc"); test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc");
/* Unnormalized BCP 47 tags */
test_tag_from_language ("JBO", "art-lojban");
test_tag_from_language ("LTZ", "i-lux");
test_tag_from_language ("MNG", "drh");
test_tag_from_language ("NOR", "no-bok");
test_tag_from_language ("NYN", "no-nyn");
test_tag_from_language ("ZHS", "i-hak");
test_tag_from_language ("ZHS", "zh-guoyu");
test_tag_from_language ("ZHS", "zh-min");
test_tag_from_language ("ZHS", "zh-min-nan");
test_tag_from_language ("ZHS", "zh-xiang");
} }
static void static void
@ -401,10 +422,11 @@ test_ot_tag_full (void)
test_tags (HB_SCRIPT_INVALID, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "ENG"); test_tags (HB_SCRIPT_INVALID, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "ENG");
test_tags (HB_SCRIPT_LATIN, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "latn", "ENG"); test_tags (HB_SCRIPT_LATIN, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "latn", "ENG");
test_tags (HB_SCRIPT_LATIN, "en", 0, 0, 0, 0); test_tags (HB_SCRIPT_LATIN, "en", 0, 0, 0, 0);
test_tags (HB_SCRIPT_INVALID, "und-fonnapa", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "APPH");
test_tags (HB_SCRIPT_INVALID, "en-fonnapa", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "APPH"); test_tags (HB_SCRIPT_INVALID, "en-fonnapa", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "APPH");
test_tags (HB_SCRIPT_INVALID, "x-hbot1234-hbsc5678", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "5678", "1234"); test_tags (HB_SCRIPT_INVALID, "x-hbot1234-hbsc5678", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "5678", "1234");
test_tags (HB_SCRIPT_MALAYALAM, "ml", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 2, 1, "mlm2", "mlym", "MLR"); test_tags (HB_SCRIPT_MALAYALAM, "ml", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 2, 2, "mlm2", "mlym", "MAL", "MLR");
test_tags (HB_SCRIPT_MALAYALAM, "ml", 1, 1, 1, 1, "mlm2", "MLR"); test_tags (HB_SCRIPT_MALAYALAM, "ml", 1, 1, 1, 1, "mlm2", "MAL");
test_tags (HB_SCRIPT_INVALID, "xyz", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "XYZ"); test_tags (HB_SCRIPT_INVALID, "xyz", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "XYZ");
test_tags (HB_SCRIPT_INVALID, "xy", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 0); test_tags (HB_SCRIPT_INVALID, "xy", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 0);
} }