2011-05-14 04:46:36 +02:00
|
|
|
/*
|
|
|
|
* Copyright © 2011 Google, Inc.
|
|
|
|
*
|
|
|
|
* This is part of HarfBuzz, a text shaping library.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, without written agreement and without
|
|
|
|
* license or royalty fees, to use, copy, modify, and distribute this
|
|
|
|
* software and its documentation for any purpose, provided that the
|
|
|
|
* above copyright notice and the following two paragraphs appear in
|
|
|
|
* all copies of this software.
|
|
|
|
*
|
|
|
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
|
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
|
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
|
|
* DAMAGE.
|
|
|
|
*
|
|
|
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
|
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
|
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
|
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
|
|
*
|
|
|
|
* Google Author(s): Behdad Esfahbod
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "hb-test.h"
|
|
|
|
|
|
|
|
#include <hb-ot.h>
|
|
|
|
|
|
|
|
/* Unit tests for hb-ot-tag.h */
|
|
|
|
|
|
|
|
|
2018-04-12 11:10:45 +02:00
|
|
|
/* https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags */
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
static void
|
|
|
|
test_simple_tags (const char *s, hb_script_t script)
|
|
|
|
{
|
|
|
|
hb_script_t tag;
|
2018-02-10 22:52:35 +01:00
|
|
|
hb_tag_t t1, t2;
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
g_test_message ("Testing script %c%c%c%c: tag %s", HB_UNTAG (hb_script_to_iso15924_tag (script)), s);
|
2011-08-26 09:18:53 +02:00
|
|
|
tag = hb_tag_from_string (s, -1);
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
hb_ot_tags_from_script (script, &t1, &t2);
|
|
|
|
|
|
|
|
g_assert_cmphex (t1, ==, tag);
|
|
|
|
g_assert_cmphex (t2, ==, HB_OT_TAG_DEFAULT_SCRIPT);
|
|
|
|
|
|
|
|
g_assert_cmphex (hb_ot_tag_to_script (tag), ==, script);
|
|
|
|
}
|
|
|
|
|
2017-12-08 17:21:14 +01:00
|
|
|
static void
|
|
|
|
test_script_tags_from_language (const char *s, const char *lang_s, hb_script_t script)
|
|
|
|
{
|
|
|
|
hb_script_t tag;
|
|
|
|
unsigned int count = 1;
|
|
|
|
hb_script_t t;
|
|
|
|
|
|
|
|
g_test_message ("Testing script %c%c%c%c: script tag %s, language tag %s", HB_UNTAG (hb_script_to_iso15924_tag (script)), s, lang_s);
|
|
|
|
tag = hb_tag_from_string (s, -1);
|
|
|
|
|
2018-07-24 03:19:23 +02:00
|
|
|
hb_ot_tags_from_script_and_language (script, hb_language_from_string (lang_s, -1), &count, &t, NULL, NULL);
|
2017-12-08 17:21:14 +01:00
|
|
|
|
|
|
|
if (count != 0)
|
|
|
|
{
|
|
|
|
g_assert_cmpuint (count, ==, 1);
|
|
|
|
g_assert_cmphex (t, ==, tag);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-05-14 04:46:36 +02:00
|
|
|
static void
|
|
|
|
test_indic_tags (const char *s1, const char *s2, hb_script_t script)
|
|
|
|
{
|
|
|
|
hb_script_t tag1, tag2;
|
|
|
|
hb_script_t t1, t2;
|
|
|
|
|
|
|
|
g_test_message ("Testing script %c%c%c%c: new tag %s, old tag %s", HB_UNTAG (hb_script_to_iso15924_tag (script)), s1, s2);
|
2011-08-26 09:18:53 +02:00
|
|
|
tag1 = hb_tag_from_string (s1, -1);
|
|
|
|
tag2 = hb_tag_from_string (s2, -1);
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
hb_ot_tags_from_script (script, &t1, &t2);
|
|
|
|
|
|
|
|
g_assert_cmphex (t1, ==, tag1);
|
|
|
|
g_assert_cmphex (t2, ==, tag2);
|
|
|
|
|
|
|
|
g_assert_cmphex (hb_ot_tag_to_script (tag1), ==, script);
|
|
|
|
g_assert_cmphex (hb_ot_tag_to_script (tag2), ==, script);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
test_ot_tag_script_degenerate (void)
|
|
|
|
{
|
2018-02-07 19:58:23 +01:00
|
|
|
hb_tag_t t1, t2;
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
g_assert_cmphex (HB_TAG_CHAR4 ("DFLT"), ==, HB_OT_TAG_DEFAULT_SCRIPT);
|
|
|
|
|
|
|
|
/* HIRAGANA and KATAKANA both map to 'kana' */
|
|
|
|
test_simple_tags ("kana", HB_SCRIPT_KATAKANA);
|
|
|
|
hb_ot_tags_from_script (HB_SCRIPT_HIRAGANA, &t1, &t2);
|
|
|
|
g_assert_cmphex (t1, ==, HB_TAG_CHAR4 ("kana"));
|
|
|
|
g_assert_cmphex (t2, ==, HB_OT_TAG_DEFAULT_SCRIPT);
|
|
|
|
|
|
|
|
test_simple_tags ("DFLT", HB_SCRIPT_INVALID);
|
|
|
|
|
|
|
|
/* Spaces are replaced */
|
2011-08-26 09:18:53 +02:00
|
|
|
g_assert_cmphex (hb_ot_tag_to_script (HB_TAG_CHAR4 ("be ")), ==, hb_script_from_string ("Beee", -1));
|
2011-05-14 04:46:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
test_ot_tag_script_simple (void)
|
|
|
|
{
|
|
|
|
/* Arbitrary non-existent script */
|
2011-08-26 09:18:53 +02:00
|
|
|
test_simple_tags ("wwyz", hb_script_from_string ("wWyZ", -1));
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
/* These we don't really care about */
|
|
|
|
test_simple_tags ("zyyy", HB_SCRIPT_COMMON);
|
|
|
|
test_simple_tags ("zinh", HB_SCRIPT_INHERITED);
|
|
|
|
test_simple_tags ("zzzz", HB_SCRIPT_UNKNOWN);
|
|
|
|
|
|
|
|
test_simple_tags ("arab", HB_SCRIPT_ARABIC);
|
|
|
|
test_simple_tags ("copt", HB_SCRIPT_COPTIC);
|
|
|
|
test_simple_tags ("kana", HB_SCRIPT_KATAKANA);
|
|
|
|
test_simple_tags ("latn", HB_SCRIPT_LATIN);
|
|
|
|
|
|
|
|
/* These are trickier since their OT script tags have space. */
|
|
|
|
test_simple_tags ("lao ", HB_SCRIPT_LAO);
|
|
|
|
test_simple_tags ("yi ", HB_SCRIPT_YI);
|
|
|
|
/* Unicode-5.0 additions */
|
|
|
|
test_simple_tags ("nko ", HB_SCRIPT_NKO);
|
|
|
|
/* Unicode-5.1 additions */
|
|
|
|
test_simple_tags ("vai ", HB_SCRIPT_VAI);
|
|
|
|
|
2018-04-12 11:10:45 +02:00
|
|
|
/* https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags */
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
/* Unicode-5.2 additions */
|
|
|
|
test_simple_tags ("mtei", HB_SCRIPT_MEETEI_MAYEK);
|
|
|
|
/* Unicode-6.0 additions */
|
|
|
|
test_simple_tags ("mand", HB_SCRIPT_MANDAIC);
|
|
|
|
}
|
|
|
|
|
2017-12-08 17:21:14 +01:00
|
|
|
static void
|
|
|
|
test_ot_tag_script_from_language (void)
|
|
|
|
{
|
|
|
|
test_script_tags_from_language (NULL, NULL, HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language (NULL, "en", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("copt", "en", HB_SCRIPT_COPTIC);
|
|
|
|
test_script_tags_from_language (NULL, "x-hbsc", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("copt", "x-hbsc", HB_SCRIPT_COPTIC);
|
|
|
|
test_script_tags_from_language ("abc ", "x-hbscabc", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("deva", "x-hbscdeva", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("dev2", "x-hbscdev2", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("copt", "x-hbotpap0-hbsccopt", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language (NULL, "en-x-hbsc", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("copt", "en-x-hbsc", HB_SCRIPT_COPTIC);
|
|
|
|
test_script_tags_from_language ("abc ", "en-x-hbscabc", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("deva", "en-x-hbscdeva", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("dev2", "en-x-hbscdev2", HB_SCRIPT_INVALID);
|
|
|
|
test_script_tags_from_language ("copt", "en-x-hbotpap0-hbsccopt", HB_SCRIPT_INVALID);
|
|
|
|
}
|
|
|
|
|
2011-05-14 04:46:36 +02:00
|
|
|
static void
|
|
|
|
test_ot_tag_script_indic (void)
|
|
|
|
{
|
|
|
|
test_indic_tags ("bng2", "beng", HB_SCRIPT_BENGALI);
|
|
|
|
test_indic_tags ("dev2", "deva", HB_SCRIPT_DEVANAGARI);
|
|
|
|
test_indic_tags ("gjr2", "gujr", HB_SCRIPT_GUJARATI);
|
|
|
|
test_indic_tags ("gur2", "guru", HB_SCRIPT_GURMUKHI);
|
|
|
|
test_indic_tags ("knd2", "knda", HB_SCRIPT_KANNADA);
|
|
|
|
test_indic_tags ("mlm2", "mlym", HB_SCRIPT_MALAYALAM);
|
|
|
|
test_indic_tags ("ory2", "orya", HB_SCRIPT_ORIYA);
|
|
|
|
test_indic_tags ("tml2", "taml", HB_SCRIPT_TAMIL);
|
|
|
|
test_indic_tags ("tel2", "telu", HB_SCRIPT_TELUGU);
|
2012-11-13 02:27:51 +01:00
|
|
|
test_indic_tags ("mym2", "mymr", HB_SCRIPT_MYANMAR);
|
2011-05-14 04:46:36 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2018-04-12 11:10:45 +02:00
|
|
|
/* https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags */
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
static void
|
|
|
|
test_language_two_way (const char *tag_s, const char *lang_s)
|
|
|
|
{
|
2011-08-26 09:18:53 +02:00
|
|
|
hb_language_t lang = hb_language_from_string (lang_s, -1);
|
|
|
|
hb_tag_t tag = hb_tag_from_string (tag_s, -1);
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
g_test_message ("Testing language %s <-> tag %s", lang_s, tag_s);
|
|
|
|
|
|
|
|
g_assert_cmphex (tag, ==, hb_ot_tag_from_language (lang));
|
|
|
|
g_assert (lang == hb_ot_tag_to_language (tag));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
test_tag_from_language (const char *tag_s, const char *lang_s)
|
|
|
|
{
|
2011-08-26 09:18:53 +02:00
|
|
|
hb_language_t lang = hb_language_from_string (lang_s, -1);
|
|
|
|
hb_tag_t tag = hb_tag_from_string (tag_s, -1);
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
g_test_message ("Testing language %s -> tag %s", lang_s, tag_s);
|
|
|
|
|
|
|
|
g_assert_cmphex (tag, ==, hb_ot_tag_from_language (lang));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
test_tag_to_language (const char *tag_s, const char *lang_s)
|
|
|
|
{
|
2011-08-26 09:18:53 +02:00
|
|
|
hb_language_t lang = hb_language_from_string (lang_s, -1);
|
|
|
|
hb_tag_t tag = hb_tag_from_string (tag_s, -1);
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
g_test_message ("Testing tag %s -> language %s", tag_s, lang_s);
|
|
|
|
|
|
|
|
g_assert (lang == hb_ot_tag_to_language (tag));
|
|
|
|
}
|
|
|
|
|
2018-07-24 03:19:23 +02:00
|
|
|
static void
|
|
|
|
test_tags_to_script_and_language (const char *script_tag_s,
|
|
|
|
const char *lang_tag_s,
|
|
|
|
const char *script_s,
|
|
|
|
const char *lang_s)
|
|
|
|
{
|
|
|
|
hb_script_t actual_script[1];
|
|
|
|
hb_language_t actual_lang[1];
|
|
|
|
hb_tag_t script_tag = hb_tag_from_string (script_tag_s, -1);
|
|
|
|
hb_tag_t lang_tag = hb_tag_from_string (lang_tag_s, -1);
|
|
|
|
hb_ot_tags_to_script_and_language (script_tag, lang_tag, actual_script, actual_lang);
|
|
|
|
g_assert_cmphex (*actual_script, ==, hb_tag_from_string (script_s, -1));
|
|
|
|
g_assert_cmpstr (hb_language_to_string (*actual_lang), ==, lang_s);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
test_ot_tags_to_script_and_language (void)
|
|
|
|
{
|
|
|
|
test_tags_to_script_and_language ("DFLT", "ENG", "", "en-x-hbscdflt");
|
|
|
|
test_tags_to_script_and_language ("latn", "ENG", "Latn", "en");
|
|
|
|
test_tags_to_script_and_language ("deva", "MAR", "Deva", "mr-x-hbscdeva");
|
|
|
|
test_tags_to_script_and_language ("dev2", "MAR", "Deva", "mr");
|
|
|
|
test_tags_to_script_and_language ("qaa", "QTZ0", "Qaaa", "x-hbotqtz0-hbscqaa");
|
|
|
|
}
|
|
|
|
|
2011-05-14 04:46:36 +02:00
|
|
|
static void
|
|
|
|
test_ot_tag_language (void)
|
|
|
|
{
|
|
|
|
g_assert_cmphex (HB_TAG_CHAR4 ("dflt"), ==, HB_OT_TAG_DEFAULT_LANGUAGE);
|
|
|
|
test_language_two_way ("dflt", NULL);
|
|
|
|
|
|
|
|
test_language_two_way ("ARA", "ar");
|
|
|
|
|
|
|
|
test_language_two_way ("AZE", "az");
|
|
|
|
test_tag_from_language ("AZE", "az-ir");
|
|
|
|
test_tag_from_language ("AZE", "az-az");
|
|
|
|
|
|
|
|
test_language_two_way ("ENG", "en");
|
|
|
|
test_tag_from_language ("ENG", "en_US");
|
|
|
|
|
2017-01-18 13:51:02 +01:00
|
|
|
test_language_two_way ("CJA", "cja"); /* Western Cham */
|
|
|
|
test_language_two_way ("CJM", "cjm"); /* Eastern Cham */
|
2011-05-14 04:46:36 +02:00
|
|
|
test_language_two_way ("EVN", "eve");
|
|
|
|
|
2017-01-18 13:51:02 +01:00
|
|
|
test_language_two_way ("HAL", "cfm"); /* BCP47 and current ISO639-3 code for Halam/Falam Chin */
|
|
|
|
test_tag_from_language ("HAL", "flm"); /* Retired ISO639-3 code for Halam/Falam Chin */
|
|
|
|
|
|
|
|
test_tag_from_language ("QIN", "bgr"); /* Bawm Chin */
|
|
|
|
test_tag_from_language ("QIN", "cbl"); /* Bualkhaw Chin */
|
|
|
|
test_tag_from_language ("QIN", "cka"); /* Khumi Awa Chin */
|
|
|
|
test_tag_from_language ("QIN", "cmr"); /* Mro-Khimi Chin */
|
|
|
|
test_tag_from_language ("QIN", "cnb"); /* Chinbon Chin */
|
|
|
|
test_tag_from_language ("QIN", "cnh"); /* Hakha Chin */
|
|
|
|
test_tag_from_language ("QIN", "cnk"); /* Khumi Chin */
|
|
|
|
test_tag_from_language ("QIN", "cnw"); /* Ngawn Chin */
|
|
|
|
test_tag_from_language ("QIN", "csh"); /* Asho Chin */
|
|
|
|
test_tag_from_language ("QIN", "csy"); /* Siyin Chin */
|
|
|
|
test_tag_from_language ("QIN", "ctd"); /* Tedim Chin */
|
|
|
|
test_tag_from_language ("QIN", "czt"); /* Zotung Chin */
|
|
|
|
test_tag_from_language ("QIN", "dao"); /* Daai Chin */
|
|
|
|
test_tag_from_language ("QIN", "hlt"); /* Matu Chin */
|
|
|
|
test_tag_from_language ("QIN", "mrh"); /* Mara Chin */
|
|
|
|
test_tag_from_language ("QIN", "pck"); /* Paite Chin */
|
|
|
|
test_tag_from_language ("QIN", "sez"); /* Senthang Chin */
|
|
|
|
test_tag_from_language ("QIN", "tcp"); /* Tawr Chin */
|
|
|
|
test_tag_from_language ("QIN", "tcz"); /* Thado Chin */
|
|
|
|
test_tag_from_language ("QIN", "yos"); /* Yos, deprecated by IANA in favor of Zou [zom] */
|
|
|
|
test_tag_from_language ("QIN", "zom"); /* Zou */
|
|
|
|
test_tag_to_language ("QIN", "bgr"); /* no single BCP47 tag for Chin; picking Bawm Chin */
|
|
|
|
|
2011-05-14 04:46:36 +02:00
|
|
|
test_language_two_way ("FAR", "fa");
|
|
|
|
test_tag_from_language ("FAR", "fa_IR");
|
|
|
|
|
2017-01-18 13:51:02 +01:00
|
|
|
test_language_two_way ("SWA", "aii"); /* Swadaya Aramaic */
|
|
|
|
|
|
|
|
test_language_two_way ("SYR", "syr"); /* Syriac [macrolanguage] */
|
|
|
|
test_tag_from_language ("SYR", "amw"); /* Western Neo-Aramaic */
|
|
|
|
test_tag_from_language ("SYR", "cld"); /* Chaldean Neo-Aramaic */
|
|
|
|
test_tag_from_language ("SYR", "syc"); /* Classical Syriac */
|
|
|
|
|
|
|
|
test_language_two_way ("TUA", "tru"); /* Turoyo Aramaic */
|
|
|
|
|
2014-07-11 01:20:35 +02:00
|
|
|
test_tag_from_language ("ZHS", "zh"); /* Chinese */
|
2011-05-14 04:46:36 +02:00
|
|
|
test_tag_from_language ("ZHS", "zh-cn"); /* Chinese (China) */
|
|
|
|
test_tag_from_language ("ZHS", "zh-sg"); /* Chinese (Singapore) */
|
2016-08-09 22:03:14 +02:00
|
|
|
test_tag_from_language ("ZHH", "zh-mo"); /* Chinese (Macao) */
|
|
|
|
test_tag_from_language ("ZHH", "zh-hant-mo"); /* Chinese (Macao) */
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_language_two_way ("ZHH", "zh-HK"); /* Chinese (Hong Kong) */
|
2016-08-09 22:03:14 +02:00
|
|
|
test_tag_from_language ("ZHH", "zH-HanT-hK"); /* Chinese (Hong Kong) */
|
2011-05-14 04:46:36 +02:00
|
|
|
test_tag_from_language ("ZHT", "zh-tw"); /* Chinese (Taiwan) */
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_language_two_way ("ZHS", "zh-Hans"); /* Chinese (Simplified) */
|
|
|
|
test_language_two_way ("ZHT", "zh-Hant"); /* Chinese (Traditional) */
|
2014-07-11 01:06:45 +02:00
|
|
|
test_tag_from_language ("ZHS", "zh-xx"); /* Chinese (Other) */
|
2011-05-14 04:46:36 +02:00
|
|
|
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_tag_from_language ("ZHS", "zh-Hans-TW");
|
|
|
|
|
|
|
|
test_tag_from_language ("ZHH", "yue");
|
|
|
|
test_tag_from_language ("ZHH", "yue-Hant");
|
|
|
|
test_tag_from_language ("ZHS", "yue-Hans");
|
|
|
|
|
2011-05-14 04:46:36 +02:00
|
|
|
test_tag_from_language ("ZHS", "zh"); /* Chinese */
|
|
|
|
test_tag_from_language ("ZHS", "zh-xx");
|
|
|
|
|
|
|
|
test_language_two_way ("ABC", "x-hbotabc");
|
|
|
|
test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc-zxc");
|
2011-05-14 05:04:46 +02:00
|
|
|
test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc");
|
|
|
|
test_tag_from_language ("ABCD", "asdf-asdf-wer-x-hbotabcd");
|
2011-05-14 04:46:36 +02:00
|
|
|
|
|
|
|
test_tag_from_language ("dflt", "asdf-asdf-wer-x-hbot-zxc");
|
2011-05-14 05:04:46 +02:00
|
|
|
|
2011-09-02 19:31:19 +02:00
|
|
|
test_tag_from_language ("dflt", "xy");
|
|
|
|
test_tag_from_language ("XYZ", "xyz"); /* Unknown ISO 639-3 */
|
|
|
|
test_tag_from_language ("XYZ", "xyz-qw"); /* Unknown ISO 639-3 */
|
|
|
|
|
2017-12-08 17:21:14 +01:00
|
|
|
/*
|
|
|
|
* Invalid input. The precise answer does not matter, as long as it
|
|
|
|
* does not crash or get into an infinite loop.
|
|
|
|
*/
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_tag_from_language ("IPPH", "-fonipa");
|
2017-12-08 17:21:14 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Tags that contain "-fonipa" as a substring but which do not contain
|
|
|
|
* the subtag "fonipa".
|
|
|
|
*/
|
|
|
|
test_tag_from_language ("ENG", "en-fonipax");
|
|
|
|
test_tag_from_language ("ENG", "en-x-fonipa");
|
|
|
|
test_tag_from_language ("ENG", "en-a-fonipa");
|
|
|
|
test_tag_from_language ("ENG", "en-a-qwe-b-fonipa");
|
|
|
|
|
2015-06-28 03:29:47 +02:00
|
|
|
/* International Phonetic Alphabet */
|
|
|
|
test_tag_from_language ("IPPH", "en-fonipa");
|
2017-12-08 17:21:14 +01:00
|
|
|
test_tag_from_language ("IPPH", "en-fonipax-fonipa");
|
2015-06-28 03:29:47 +02:00
|
|
|
test_tag_from_language ("IPPH", "rm-CH-fonipa-sursilv-x-foobar");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_language_two_way ("IPPH", "und-fonipa");
|
2015-06-28 03:29:47 +02:00
|
|
|
test_tag_from_language ("IPPH", "zh-fonipa");
|
|
|
|
|
2016-08-18 12:48:38 +02:00
|
|
|
/* North American Phonetic Alphabet (Americanist Phonetic Notation) */
|
|
|
|
test_tag_from_language ("APPH", "en-fonnapa");
|
|
|
|
test_tag_from_language ("APPH", "chr-fonnapa");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_language_two_way ("APPH", "und-fonnapa");
|
|
|
|
|
|
|
|
/* Khutsuri Georgian */
|
|
|
|
test_tag_from_language ("KGE", "ka-Geok");
|
|
|
|
test_language_two_way ("KGE", "und-Geok");
|
|
|
|
|
|
|
|
/* Irish Traditional */
|
|
|
|
test_language_two_way ("IRT", "ga-Latg");
|
|
|
|
|
|
|
|
/* Moldavian */
|
|
|
|
test_language_two_way ("MOL", "ro-MD");
|
|
|
|
|
|
|
|
/* Polytonic Greek */
|
|
|
|
test_language_two_way ("PGR", "el-polyton");
|
|
|
|
test_tag_from_language ("PGR", "el-CY-polyton");
|
2016-08-18 12:48:38 +02:00
|
|
|
|
2017-01-18 13:51:02 +01:00
|
|
|
/* Estrangela Syriac */
|
|
|
|
test_tag_from_language ("SYRE", "aii-Syre");
|
|
|
|
test_tag_from_language ("SYRE", "de-Syre");
|
|
|
|
test_tag_from_language ("SYRE", "syr-Syre");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_language_two_way ("SYRE", "und-Syre");
|
2017-01-18 13:51:02 +01:00
|
|
|
|
|
|
|
/* Western Syriac */
|
|
|
|
test_tag_from_language ("SYRJ", "aii-Syrj");
|
|
|
|
test_tag_from_language ("SYRJ", "de-Syrj");
|
|
|
|
test_tag_from_language ("SYRJ", "syr-Syrj");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_language_two_way ("SYRJ", "und-Syrj");
|
2017-01-18 13:51:02 +01:00
|
|
|
|
|
|
|
/* Eastern Syriac */
|
|
|
|
test_tag_from_language ("SYRN", "aii-Syrn");
|
|
|
|
test_tag_from_language ("SYRN", "de-Syrn");
|
|
|
|
test_tag_from_language ("SYRN", "syr-Syrn");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_language_two_way ("SYRN", "und-Syrn");
|
2017-01-18 13:51:02 +01:00
|
|
|
|
2011-05-14 05:04:46 +02:00
|
|
|
/* Test that x-hbot overrides the base language */
|
|
|
|
test_tag_from_language ("ABC", "fa-x-hbotabc-zxc");
|
|
|
|
test_tag_from_language ("ABC", "fa-ir-x-hbotabc-zxc");
|
|
|
|
test_tag_from_language ("ABC", "zh-x-hbotabc-zxc");
|
|
|
|
test_tag_from_language ("ABC", "zh-cn-x-hbotabc-zxc");
|
|
|
|
test_tag_from_language ("ABC", "zh-xy-x-hbotabc-zxc");
|
2011-09-02 19:31:19 +02:00
|
|
|
test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
|
|
|
|
/* Unnormalized BCP 47 tags */
|
2018-01-20 21:53:09 +01:00
|
|
|
test_tag_from_language ("ARA", "ar-aao");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_tag_from_language ("JBO", "art-lojban");
|
2018-01-20 21:53:09 +01:00
|
|
|
test_tag_from_language ("KOK", "kok-gom");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_tag_from_language ("LTZ", "i-lux");
|
|
|
|
test_tag_from_language ("MNG", "drh");
|
2018-01-20 21:53:09 +01:00
|
|
|
test_tag_from_language ("MOR", "ar-ary");
|
|
|
|
test_tag_from_language ("MOR", "ar-ary-DZ");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_tag_from_language ("NOR", "no-bok");
|
|
|
|
test_tag_from_language ("NYN", "no-nyn");
|
|
|
|
test_tag_from_language ("ZHS", "i-hak");
|
|
|
|
test_tag_from_language ("ZHS", "zh-guoyu");
|
|
|
|
test_tag_from_language ("ZHS", "zh-min");
|
|
|
|
test_tag_from_language ("ZHS", "zh-min-nan");
|
|
|
|
test_tag_from_language ("ZHS", "zh-xiang");
|
2018-01-20 21:53:09 +01:00
|
|
|
|
|
|
|
/* A UN M.49 region code, not an extended language subtag */
|
|
|
|
test_tag_from_language ("ARA", "ar-001");
|
2011-05-14 04:46:36 +02:00
|
|
|
}
|
|
|
|
|
2017-12-08 17:21:14 +01:00
|
|
|
static void
|
|
|
|
test_tags (hb_script_t script,
|
|
|
|
const char *lang_s,
|
|
|
|
unsigned int script_count,
|
|
|
|
unsigned int language_count,
|
|
|
|
unsigned int expected_script_count,
|
|
|
|
unsigned int expected_language_count,
|
|
|
|
...)
|
|
|
|
{
|
|
|
|
va_list expected_tags;
|
|
|
|
unsigned int i;
|
|
|
|
hb_tag_t *script_tags = malloc (script_count * sizeof (hb_tag_t));
|
|
|
|
hb_tag_t *language_tags = malloc (language_count * sizeof (hb_tag_t));
|
|
|
|
g_assert_nonnull (script_tags);
|
|
|
|
g_assert_nonnull (language_tags);
|
|
|
|
hb_language_t lang = hb_language_from_string (lang_s, -1);
|
|
|
|
va_start (expected_tags, expected_language_count);
|
|
|
|
|
2018-07-24 03:19:23 +02:00
|
|
|
hb_ot_tags_from_script_and_language (script, lang, &script_count, script_tags, &language_count, language_tags);
|
2017-12-08 17:21:14 +01:00
|
|
|
|
|
|
|
g_assert_cmpuint (script_count, ==, expected_script_count);
|
|
|
|
g_assert_cmpuint (language_count, ==, expected_language_count);
|
|
|
|
|
|
|
|
for (i = 0; i < script_count + language_count; i++)
|
|
|
|
{
|
|
|
|
hb_tag_t expected_tag = hb_tag_from_string (va_arg (expected_tags, const char *), -1);
|
|
|
|
hb_tag_t actual_tag = i < script_count ? script_tags[i] : language_tags[i - script_count];
|
|
|
|
g_assert_cmphex (actual_tag, ==, expected_tag);
|
|
|
|
}
|
|
|
|
|
|
|
|
free (script_tags);
|
|
|
|
free (language_tags);
|
|
|
|
va_end (expected_tags);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
test_ot_tag_full (void)
|
|
|
|
{
|
|
|
|
test_tags (HB_SCRIPT_INVALID, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "ENG");
|
2018-07-24 03:19:23 +02:00
|
|
|
test_tags (HB_SCRIPT_INVALID, "en-x-hbscdflt", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "DFLT", "ENG");
|
2017-12-08 17:21:14 +01:00
|
|
|
test_tags (HB_SCRIPT_LATIN, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "latn", "ENG");
|
|
|
|
test_tags (HB_SCRIPT_LATIN, "en", 0, 0, 0, 0);
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_tags (HB_SCRIPT_INVALID, "und-fonnapa", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "APPH");
|
2017-12-08 17:21:14 +01:00
|
|
|
test_tags (HB_SCRIPT_INVALID, "en-fonnapa", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "APPH");
|
|
|
|
test_tags (HB_SCRIPT_INVALID, "x-hbot1234-hbsc5678", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "5678", "1234");
|
2018-07-24 03:19:23 +02:00
|
|
|
test_tags (HB_SCRIPT_INVALID, "x-hbsc5678-hbot1234", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "5678", "1234");
|
Autogenerate the BCP 47 to OpenType mappings
The new script, gen-tag-table.py, generates `ot_languages` automatically
from the [OpenType language system tag registry][ot] and the [IANA
Language Subtag Registry][bcp47] with some manual modifications. If an
OpenType tag maps to a BCP 47 macrolanguage, all the macrolanguage's
individual languages are mapped to the same OpenType tag, except for
individual languages with their own OpenType mappings. Deprecated
BCP 47 tags are canonicalized.
[ot]: https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags
[bcp47]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
Some OpenType tags correspond to multiple ISO 639 codes. The mapping
from ISO 639 codes lists OpenType tags in priority order, such that more
specific or more likely tags appear first.
Some OpenType tags have no corresponding ISO 639 code in the registry so
their mappings use BCP 47 subtags besides the language. For example, any
BCP 47 tag with a fonipa variant subtag is mapped to 'IPPH', and 'IPPH'
is mapped back to und-fonipa.
Other OpenType tags have no corresponding ISO 639 code because it is not
clear what they are for. HarfBuzz just ignores these tags.
One such ignored tag is 'ZHP ' (Chinese Phonetic). It probably means
zh-Latn. However, it is used in Microsoft JhengHei and Microsoft YaHei
with the script tag 'hani', implying that it is not a romanization
scheme after all. It would be simple enough to add this mapping to
gen-tag-table.py once a definitive mapping is determined.
The manual modifications are mainly either obvious mappings that the
OpenType registry omits or mappings for compatibility with previous
versions of HarfBuzz. Some of the old mappings were discarded, though,
for homophonous language names. For example, OpenType maps 'KUI ' to
kxu; previous versions of HarfBuzz also mapped it to kvd, because kvd
and kxu both happen to be called "Kui".
gen-tag-table.py also generates a function to convert multi-subtag tags
like el-polyton and zh-HK to OpenType tags, replacing `ot_languages_zh`
and the hard-coded list of special cases in `hb_ot_tags_from_language`.
It also generates a function to convert OpenType tags to BCP 47,
replacing the hard-coded list of special cases in
`hb_ot_tag_to_language`.
2017-12-09 04:45:52 +01:00
|
|
|
test_tags (HB_SCRIPT_MALAYALAM, "ml", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 2, 2, "mlm2", "mlym", "MAL", "MLR");
|
|
|
|
test_tags (HB_SCRIPT_MALAYALAM, "ml", 1, 1, 1, 1, "mlm2", "MAL");
|
2017-12-08 17:21:14 +01:00
|
|
|
test_tags (HB_SCRIPT_INVALID, "xyz", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "XYZ");
|
|
|
|
test_tags (HB_SCRIPT_INVALID, "xy", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 0);
|
|
|
|
}
|
|
|
|
|
2011-05-14 04:46:36 +02:00
|
|
|
int
|
|
|
|
main (int argc, char **argv)
|
|
|
|
{
|
|
|
|
hb_test_init (&argc, &argv);
|
|
|
|
|
|
|
|
hb_test_add (test_ot_tag_script_degenerate);
|
|
|
|
hb_test_add (test_ot_tag_script_simple);
|
2017-12-08 17:21:14 +01:00
|
|
|
hb_test_add (test_ot_tag_script_from_language);
|
2011-05-14 04:46:36 +02:00
|
|
|
hb_test_add (test_ot_tag_script_indic);
|
|
|
|
|
2018-07-24 03:19:23 +02:00
|
|
|
hb_test_add (test_ot_tags_to_script_and_language);
|
|
|
|
|
2011-05-14 04:46:36 +02:00
|
|
|
hb_test_add (test_ot_tag_language);
|
|
|
|
|
2017-12-08 17:21:14 +01:00
|
|
|
hb_test_add (test_ot_tag_full);
|
|
|
|
|
2011-05-14 04:46:36 +02:00
|
|
|
return hb_test_run();
|
|
|
|
}
|