[test/test-ot-tag] Test hb-ot-tag.h, fix many bugs

I'm in awe with how many bugs this test revealed.  All fixed.
This commit is contained in:
Behdad Esfahbod 2011-05-13 22:46:36 -04:00
parent 1368018b47
commit 40b5c2e86c
3 changed files with 325 additions and 68 deletions

View File

@ -40,7 +40,12 @@ static hb_tag_t
hb_ot_old_tag_from_script (hb_script_t script) hb_ot_old_tag_from_script (hb_script_t script)
{ {
switch ((hb_tag_t) script) { switch ((hb_tag_t) script) {
case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT;
/* KATAKANA and HIRAGANA both map to 'kana' */
case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a'); case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a');
/* Spaces at the end are preserved, unlike ISO 15924 */
case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' '); case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' ');
case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' '); case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' ');
/* Unicode-5.0 additions */ /* Unicode-5.0 additions */
@ -48,7 +53,6 @@ hb_ot_old_tag_from_script (hb_script_t script)
/* Unicode-5.1 additions */ /* Unicode-5.1 additions */
case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' '); case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' ');
/* Unicode-5.2 additions */ /* Unicode-5.2 additions */
case HB_SCRIPT_MEETEI_MAYEK: return HB_TAG('m','y','e','i');
/* Unicode-6.0 additions */ /* Unicode-6.0 additions */
} }
@ -59,20 +63,19 @@ hb_ot_old_tag_from_script (hb_script_t script)
static hb_script_t static hb_script_t
hb_ot_old_tag_to_script (hb_tag_t tag) hb_ot_old_tag_to_script (hb_tag_t tag)
{ {
switch (tag) { if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT))
case HB_TAG('k','a','n','a'): return HB_SCRIPT_HIRAGANA; return HB_SCRIPT_INVALID;
case HB_TAG('l','a','o',' '): return HB_SCRIPT_LAO;
case HB_TAG('y','i',' ',' '): return HB_SCRIPT_YI;
/* Unicode-5.0 additions */
case HB_TAG('n','k','o',' '): return HB_SCRIPT_NKO;
/* Unicode-5.1 additions */
case HB_TAG('v','a','i',' '): return HB_SCRIPT_VAI;
/* Unicode-5.2 additions */
case HB_TAG('m','y','e','i'): return HB_SCRIPT_MEETEI_MAYEK;
/* Unicode-6.0 additions */
}
/* Else, just change first char to uppercase and return */ /* This side of the conversion is fully algorithmic. */
/* Any spaces at the end of the tag are replaced by repeating the last
* letter. Eg 'nko ' -> 'Nkoo' */
if (unlikely ((tag & 0x0000FF00) == 0x00002000))
tag |= (tag >> 8) & 0x0000FF00; /* Copy second letter to third */
if (unlikely ((tag & 0x000000FF) == 0x00000020))
tag |= (tag >> 8) & 0x000000FF; /* Copy third letter to fourth */
/* Change first char to uppercase and return */
return (hb_script_t) (tag & ~0x20000000); return (hb_script_t) (tag & ~0x20000000);
} }
@ -91,7 +94,7 @@ hb_ot_new_tag_from_script (hb_script_t script)
case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2'); case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2');
} }
return HB_TAG_NONE; return HB_OT_TAG_DEFAULT_SCRIPT;
} }
static hb_script_t static hb_script_t
@ -114,7 +117,8 @@ hb_ot_new_tag_to_script (hb_tag_t tag)
/* /*
* Complete list at: * Complete list at:
* http://www.microsoft.com/typography/otspec/scripttags.htm * https://www.microsoft.com/typography/otspec/scripttags.htm
* https://www.microsoft.com/typography/otspec160/scripttagsProposed.htm
* *
* Most of the script tags are the same as the ISO 15924 tag but lowercased. * Most of the script tags are the same as the ISO 15924 tag but lowercased.
* So we just do that, and handle the exceptional cases in a switch. * So we just do that, and handle the exceptional cases in a switch.
@ -127,11 +131,11 @@ hb_ot_tags_from_script (hb_script_t script,
{ {
hb_tag_t new_tag; hb_tag_t new_tag;
*script_tag_2 = HB_TAG_NONE; *script_tag_2 = HB_OT_TAG_DEFAULT_SCRIPT;
*script_tag_1 = hb_ot_old_tag_from_script (script); *script_tag_1 = hb_ot_old_tag_from_script (script);
new_tag = hb_ot_new_tag_from_script (script); new_tag = hb_ot_new_tag_from_script (script);
if (unlikely (new_tag != HB_TAG_NONE)) { if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT)) {
*script_tag_2 = *script_tag_1; *script_tag_2 = *script_tag_1;
*script_tag_1 = new_tag; *script_tag_1 = new_tag;
} }
@ -165,6 +169,7 @@ typedef struct {
* Many items still missing. Those are commented out at the end. * Many items still missing. Those are commented out at the end.
* Keep sorted for bsearch. * Keep sorted for bsearch.
*/ */
static const LangTag ot_languages[] = { static const LangTag ot_languages[] = {
{"aa", HB_TAG('A','F','R',' ')}, /* Afar */ {"aa", HB_TAG('A','F','R',' ')}, /* Afar */
{"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */ {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */
@ -451,11 +456,6 @@ static const LangTag ot_languages[] = {
{"yi", HB_TAG('J','I','I',' ')}, /* Yiddish */ {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish */
{"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */ {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */
{"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */ {"yso", HB_TAG('N','I','S',' ')}, /* Nisi (China) */
{"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */
{"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */
{"zh-mo", HB_TAG('Z','H','T',' ')}, /* Chinese (Macao) */
{"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */
{"zh-tw", HB_TAG('Z','H','T',' ')}, /* Chinese (Taiwan) */
{"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */
{"zu", HB_TAG('Z','U','L',' ')} /* Zulu */ {"zu", HB_TAG('Z','U','L',' ')} /* Zulu */
@ -571,6 +571,14 @@ static const LangTag ot_languages[] = {
/*{"??", HB_TAG('Z','H','P',' ')},*/ /* Chinese Phonetic */ /*{"??", HB_TAG('Z','H','P',' ')},*/ /* Chinese Phonetic */
}; };
static const LangTag ot_languages_zh[] = {
{"zh-cn", HB_TAG('Z','H','S',' ')}, /* Chinese (China) */
{"zh-hk", HB_TAG('Z','H','H',' ')}, /* Chinese (Hong Kong) */
{"zh-mo", HB_TAG('Z','H','T',' ')}, /* Chinese (Macao) */
{"zh-sg", HB_TAG('Z','H','S',' ')}, /* Chinese (Singapore) */
{"zh-tw", HB_TAG('Z','H','T',' ')} /* Chinese (Taiwan) */
};
static int static int
lang_compare_first_component (const char *a, lang_compare_first_component (const char *a,
const char *b) const char *b)
@ -592,66 +600,58 @@ lang_matches (const char *lang_str, const char *spec)
{ {
unsigned int len = strlen (spec); unsigned int len = strlen (spec);
return lang_str && strncmp (lang_str, spec, len) == 0 && return strncmp (lang_str, spec, len) == 0 &&
(lang_str[len] == '\0' || lang_str[len] == '-'); (lang_str[len] == '\0' || lang_str[len] == '-');
} }
hb_tag_t hb_tag_t
hb_ot_tag_from_language (hb_language_t language) hb_ot_tag_from_language (hb_language_t language)
{ {
const char *lang_str; const char *lang_str, *s;
LangTag *lang_tag; const LangTag *lang_tag;
if (language == NULL) if (language == NULL)
return HB_OT_TAG_DEFAULT_LANGUAGE; return HB_OT_TAG_DEFAULT_LANGUAGE;
lang_str = hb_language_to_string (language); lang_str = hb_language_to_string (language);
if (0 == strncmp (lang_str, "x-hbot", 6)) { s = strstr (lang_str, "x-hbot");
if (s) {
char tag[4]; char tag[4];
int i; int i;
lang_str += 6; s += 6;
for (i = 0; i < 4 && ISALPHA (lang_str[i]); i++) for (i = 0; i < 4 && ISALPHA (s[i]); i++)
tag[i] = TOUPPER (lang_str[i]); tag[i] = TOUPPER (s[i]);
if (i) {
for (; i < 4; i++) for (; i < 4; i++)
tag[i] = ' '; tag[i] = ' ';
return HB_TAG_CHAR4 (tag); return HB_TAG_CHAR4 (tag);
} }
}
/* find a language matching in the first component */ /* Find a language matching in the first component */
lang_tag = (LangTag *) bsearch (lang_str, ot_languages, lang_tag = (LangTag *) bsearch (lang_str, ot_languages,
ARRAY_LENGTH (ot_languages), sizeof (LangTag), ARRAY_LENGTH (ot_languages), sizeof (LangTag),
(hb_compare_func_t) lang_compare_first_component); (hb_compare_func_t) lang_compare_first_component);
/* we now need to find the best language matching */
if (lang_tag)
{
hb_bool_t found = FALSE;
/* go to the final one matching in the first component */
while (lang_tag + 1 < ot_languages + ARRAY_LENGTH (ot_languages) &&
lang_compare_first_component (lang_str, (lang_tag + 1)->language) == 0)
lang_tag++;
/* go back, find which one matches completely */
while (lang_tag >= ot_languages &&
lang_compare_first_component (lang_str, lang_tag->language) == 0)
{
if (lang_matches (lang_str, lang_tag->language)) {
found = TRUE;
break;
}
lang_tag--;
}
if (!found)
lang_tag = NULL;
}
if (lang_tag) if (lang_tag)
return lang_tag->tag; return lang_tag->tag;
/* Otherwise, check the Chinese ones */
if (0 == lang_compare_first_component (lang_str, "zh"))
{
unsigned int i;
for (i = 0; i < ARRAY_LENGTH (ot_languages_zh); i++)
{
lang_tag = &ot_languages_zh[i];
if (lang_matches (lang_tag->language, lang_str))
return lang_tag->tag;
}
/* Otherwise just return 'ZHS ' */
return HB_TAG('Z','H','S',' ');
}
return HB_OT_TAG_DEFAULT_LANGUAGE; return HB_OT_TAG_DEFAULT_LANGUAGE;
} }
@ -659,18 +659,45 @@ hb_language_t
hb_ot_tag_to_language (hb_tag_t tag) hb_ot_tag_to_language (hb_tag_t tag)
{ {
unsigned int i; unsigned int i;
unsigned char buf[11] = "x-hbot";
if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
return NULL;
for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
if (ot_languages[i].tag == tag) if (ot_languages[i].tag == tag)
return hb_language_from_string (ot_languages[i].language); return hb_language_from_string (ot_languages[i].language);
/* If tag starts with ZH, it's Chinese */
if ((tag & 0xFFFF0000) == 0x5A480000) {
switch (tag) {
case HB_TAG('Z','H','H',' '): return hb_language_from_string ("zh-hk"); /* Hong Kong */
default: {
/* Encode the tag... */
unsigned char buf[14] = "zh-x-hbot";
buf[9] = tag >> 24;
buf[10] = (tag >> 16) & 0xFF;
buf[11] = (tag >> 8) & 0xFF;
buf[12] = tag & 0xFF;
if (buf[12] == 0x20)
buf[12] = '\0';
buf[13] = '\0';
return hb_language_from_string ((char *) buf);
}
}
}
/* Else return a custom language in the form of "x-hbotXXXX" */
{
unsigned char buf[11] = "x-hbot";
buf[6] = tag >> 24; buf[6] = tag >> 24;
buf[7] = (tag >> 16) & 0xFF; buf[7] = (tag >> 16) & 0xFF;
buf[8] = (tag >> 8) & 0xFF; buf[8] = (tag >> 8) & 0xFF;
buf[9] = tag & 0xFF; buf[9] = tag & 0xFF;
if (buf[9] == 0x20)
buf[9] = '\0';
buf[10] = '\0'; buf[10] = '\0';
return hb_language_from_string ((char *) buf); return hb_language_from_string ((char *) buf);
}
} }

View File

@ -21,6 +21,9 @@ TEST_PROGS += \
test-unicode \ test-unicode \
$(NULL) $(NULL)
TEST_PROGS += \
test-ot-tag \
$(NULL)
# Tests for header compilation # Tests for header compilation
TEST_PROGS += \ TEST_PROGS += \

227
test/test-ot-tag.c Normal file
View File

@ -0,0 +1,227 @@
/*
* Copyright © 2011 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#include "hb-test.h"
#include <hb-ot.h>
/* Unit tests for hb-ot-tag.h */
/* https://www.microsoft.com/typography/otspec/scripttags.htm */
static void
test_simple_tags (const char *s, hb_script_t script)
{
hb_script_t tag;
hb_script_t t1, t2;
g_test_message ("Testing script %c%c%c%c: tag %s", HB_UNTAG (hb_script_to_iso15924_tag (script)), s);
tag = hb_tag_from_string (s);
hb_ot_tags_from_script (script, &t1, &t2);
g_assert_cmphex (t1, ==, tag);
g_assert_cmphex (t2, ==, HB_OT_TAG_DEFAULT_SCRIPT);
g_assert_cmphex (hb_ot_tag_to_script (tag), ==, script);
}
static void
test_indic_tags (const char *s1, const char *s2, hb_script_t script)
{
hb_script_t tag1, tag2;
hb_script_t t1, t2;
g_test_message ("Testing script %c%c%c%c: new tag %s, old tag %s", HB_UNTAG (hb_script_to_iso15924_tag (script)), s1, s2);
tag1 = hb_tag_from_string (s1);
tag2 = hb_tag_from_string (s2);
hb_ot_tags_from_script (script, &t1, &t2);
g_assert_cmphex (t1, ==, tag1);
g_assert_cmphex (t2, ==, tag2);
g_assert_cmphex (hb_ot_tag_to_script (tag1), ==, script);
g_assert_cmphex (hb_ot_tag_to_script (tag2), ==, script);
}
static void
test_ot_tag_script_degenerate (void)
{
hb_script_t t1, t2;
g_assert_cmphex (HB_TAG_CHAR4 ("DFLT"), ==, HB_OT_TAG_DEFAULT_SCRIPT);
/* HIRAGANA and KATAKANA both map to 'kana' */
test_simple_tags ("kana", HB_SCRIPT_KATAKANA);
hb_ot_tags_from_script (HB_SCRIPT_HIRAGANA, &t1, &t2);
g_assert_cmphex (t1, ==, HB_TAG_CHAR4 ("kana"));
g_assert_cmphex (t2, ==, HB_OT_TAG_DEFAULT_SCRIPT);
test_simple_tags ("DFLT", HB_SCRIPT_INVALID);
/* Spaces are replaced */
g_assert_cmphex (hb_ot_tag_to_script (HB_TAG_CHAR4 ("be ")), ==, hb_script_from_string ("Beee"));
}
static void
test_ot_tag_script_simple (void)
{
/* Arbitrary non-existent script */
test_simple_tags ("wwyz", hb_script_from_string ("wWyZ"));
/* These we don't really care about */
test_simple_tags ("zyyy", HB_SCRIPT_COMMON);
test_simple_tags ("zinh", HB_SCRIPT_INHERITED);
test_simple_tags ("zzzz", HB_SCRIPT_UNKNOWN);
test_simple_tags ("arab", HB_SCRIPT_ARABIC);
test_simple_tags ("copt", HB_SCRIPT_COPTIC);
test_simple_tags ("kana", HB_SCRIPT_KATAKANA);
test_simple_tags ("latn", HB_SCRIPT_LATIN);
/* These are trickier since their OT script tags have space. */
test_simple_tags ("lao ", HB_SCRIPT_LAO);
test_simple_tags ("yi ", HB_SCRIPT_YI);
/* Unicode-5.0 additions */
test_simple_tags ("nko ", HB_SCRIPT_NKO);
/* Unicode-5.1 additions */
test_simple_tags ("vai ", HB_SCRIPT_VAI);
/* https://www.microsoft.com/typography/otspec160/scripttagsProposed.htm */
/* Unicode-5.2 additions */
test_simple_tags ("mtei", HB_SCRIPT_MEETEI_MAYEK);
/* Unicode-6.0 additions */
test_simple_tags ("mand", HB_SCRIPT_MANDAIC);
}
static void
test_ot_tag_script_indic (void)
{
test_indic_tags ("bng2", "beng", HB_SCRIPT_BENGALI);
test_indic_tags ("dev2", "deva", HB_SCRIPT_DEVANAGARI);
test_indic_tags ("gjr2", "gujr", HB_SCRIPT_GUJARATI);
test_indic_tags ("gur2", "guru", HB_SCRIPT_GURMUKHI);
test_indic_tags ("knd2", "knda", HB_SCRIPT_KANNADA);
test_indic_tags ("mlm2", "mlym", HB_SCRIPT_MALAYALAM);
test_indic_tags ("ory2", "orya", HB_SCRIPT_ORIYA);
test_indic_tags ("tml2", "taml", HB_SCRIPT_TAMIL);
test_indic_tags ("tel2", "telu", HB_SCRIPT_TELUGU);
}
/* https://www.microsoft.com/typography/otspec/languagetags.htm */
static void
test_language_two_way (const char *tag_s, const char *lang_s)
{
hb_language_t lang = hb_language_from_string (lang_s);
hb_tag_t tag = hb_tag_from_string (tag_s);
g_test_message ("Testing language %s <-> tag %s", lang_s, tag_s);
g_assert_cmphex (tag, ==, hb_ot_tag_from_language (lang));
g_assert (lang == hb_ot_tag_to_language (tag));
}
static void
test_tag_from_language (const char *tag_s, const char *lang_s)
{
hb_language_t lang = hb_language_from_string (lang_s);
hb_tag_t tag = hb_tag_from_string (tag_s);
g_test_message ("Testing language %s -> tag %s", lang_s, tag_s);
g_assert_cmphex (tag, ==, hb_ot_tag_from_language (lang));
}
static void
test_tag_to_language (const char *tag_s, const char *lang_s)
{
hb_language_t lang = hb_language_from_string (lang_s);
hb_tag_t tag = hb_tag_from_string (tag_s);
g_test_message ("Testing tag %s -> language %s", tag_s, lang_s);
g_assert (lang == hb_ot_tag_to_language (tag));
}
static void
test_ot_tag_language (void)
{
g_assert_cmphex (HB_TAG_CHAR4 ("dflt"), ==, HB_OT_TAG_DEFAULT_LANGUAGE);
test_language_two_way ("dflt", NULL);
test_language_two_way ("ARA", "ar");
test_language_two_way ("AZE", "az");
test_tag_from_language ("AZE", "az-ir");
test_tag_from_language ("AZE", "az-az");
test_language_two_way ("ENG", "en");
test_tag_from_language ("ENG", "en_US");
test_language_two_way ("EVN", "eve");
test_language_two_way ("FAR", "fa");
test_tag_from_language ("FAR", "fa_IR");
test_language_two_way ("ZHH", "zh-hk"); /* Chinese (Hong Kong) */
test_tag_from_language ("ZHS", "zh-cn"); /* Chinese (China) */
test_tag_from_language ("ZHS", "zh-sg"); /* Chinese (Singapore) */
test_tag_from_language ("ZHT", "zh-mo"); /* Chinese (Macao) */
test_tag_from_language ("ZHT", "zh-tw"); /* Chinese (Taiwan) */
test_tag_from_language ("ZHS", "zh"); /* Chinese */
test_tag_from_language ("ZHS", "zh-xx");
test_tag_to_language ("ZHS", "zh-x-hbotzhs");
test_tag_to_language ("ZHT", "zh-x-hbotzht");
test_tag_to_language ("ZHP", "zh-x-hbotzhp");
test_language_two_way ("ABC", "x-hbotabc");
test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc-zxc");
test_tag_from_language ("dflt", "asdf-asdf-wer-x-hbot-zxc");
}
int
main (int argc, char **argv)
{
hb_test_init (&argc, &argv);
hb_test_add (test_ot_tag_script_degenerate);
hb_test_add (test_ot_tag_script_simple);
hb_test_add (test_ot_tag_script_indic);
hb_test_add (test_ot_tag_language);
return hb_test_run();
}