Comment out ot_languages where fallback suffices

This commit is contained in:
David Corbett 2019-04-16 17:29:34 -04:00 committed by Behdad Esfahbod
parent 5daeff3e68
commit 6745a600bf
4 changed files with 255 additions and 227 deletions

View File

@ -895,11 +895,18 @@ def language_name_intersection (a, b):
def get_matching_language_name (intersection, candidates): def get_matching_language_name (intersection, candidates):
return next (iter (c for c in candidates if not intersection.isdisjoint (get_variant_set (c)))) return next (iter (c for c in candidates if not intersection.isdisjoint (get_variant_set (c))))
def same_tag (bcp_47_tag, ot_tags):
return len (bcp_47_tag) == 3 and len (ot_tags) == 1 and bcp_47_tag == ot_tags[0].lower ()
for language, tags in sorted (ot.from_bcp_47.items ()): for language, tags in sorted (ot.from_bcp_47.items ()):
if language == '' or '-' in language: if language == '' or '-' in language:
continue continue
commented_out = same_tag (language, tags)
for i, tag in enumerate (tags, start=1): for i, tag in enumerate (tags, start=1):
print (' {\"%s\",\t%s},\t/* ' % (language, hb_tag (tag)), end='') print ('%s{\"%s\",\t%s},' % ('/*' if commented_out else ' ', language, hb_tag (tag)), end='')
if commented_out:
print ('*/', end='')
print ('\t/* ', end='')
bcp_47_name = bcp_47.names.get (language, '') bcp_47_name = bcp_47.names.get (language, '')
bcp_47_name_candidates = bcp_47_name.split ('\n') bcp_47_name_candidates = bcp_47_name.split ('\n')
intersection = language_name_intersection (bcp_47_name, ot.names[tag]) intersection = language_name_intersection (bcp_47_name, ot.names[tag])
@ -1040,7 +1047,8 @@ print (' * @tag: A language tag.')
print (' *') print (' *')
print (' * Converts @tag to a BCP 47 language tag if it is ambiguous (it corresponds to') print (' * Converts @tag to a BCP 47 language tag if it is ambiguous (it corresponds to')
print (' * many language tags) and the best tag is not the alphabetically first, or if') print (' * many language tags) and the best tag is not the alphabetically first, or if')
print (' * the best tag consists of multiple subtags.') print (' * the best tag consists of multiple subtags, or if the best tag does not appear')
print (' * in #ot_languages.')
print (' *') print (' *')
print (' * Return value: The #hb_language_t corresponding to the BCP 47 language tag,') print (' * Return value: The #hb_language_t corresponding to the BCP 47 language tag,')
print (' * or #HB_LANGUAGE_INVALID if @tag is not ambiguous.') print (' * or #HB_LANGUAGE_INVALID if @tag is not ambiguous.')
@ -1091,7 +1099,8 @@ def verify_disambiguation_dict ():
'%s is not a valid disambiguation for %s' % (disambiguation[ot_tag], ot_tag)) '%s is not a valid disambiguation for %s' % (disambiguation[ot_tag], ot_tag))
elif ot_tag not in disambiguation: elif ot_tag not in disambiguation:
disambiguation[ot_tag] = macrolanguages[0] disambiguation[ot_tag] = macrolanguages[0]
if disambiguation[ot_tag] == sorted (primary_tags)[0] and '-' not in disambiguation[ot_tag]: different_primary_tags = sorted (t for t in primary_tags if not same_tag (t, ot.from_bcp_47.get (t)))
if different_primary_tags and disambiguation[ot_tag] == different_primary_tags[0] and '-' not in disambiguation[ot_tag]:
del disambiguation[ot_tag] del disambiguation[ot_tag]
for ot_tag in disambiguation.keys (): for ot_tag in disambiguation.keys ():
expect (ot_tag in ot.to_bcp_47, 'unknown OT tag: %s' % ot_tag) expect (ot_tag in ot.to_bcp_47, 'unknown OT tag: %s' % ot_tag)

File diff suppressed because it is too large Load Diff

View File

@ -426,17 +426,30 @@ hb_ot_tag_to_language (hb_tag_t tag)
if (ot_languages[i].tag == tag) if (ot_languages[i].tag == tag)
return hb_language_from_string (ot_languages[i].language, -1); return hb_language_from_string (ot_languages[i].language, -1);
/* Else return a custom language in the form of "x-hbotABCD" */ /* If it's three letters long, assume it's ISO 639-3 and lower-case and use it
* (if it's not a registered tag, calling hb_ot_tag_from_language on the
* result might not return the same tag as the original tag).
* Else return a custom language in the form of "x-hbotABCD". */
{ {
unsigned char buf[11] = "x-hbot"; char buf[11] = "x-hbot";
char *str = buf;
buf[6] = tag >> 24; buf[6] = tag >> 24;
buf[7] = (tag >> 16) & 0xFF; buf[7] = (tag >> 16) & 0xFF;
buf[8] = (tag >> 8) & 0xFF; buf[8] = (tag >> 8) & 0xFF;
buf[9] = tag & 0xFF; buf[9] = tag & 0xFF;
if (buf[9] == 0x20) if (buf[9] == 0x20)
{
buf[9] = '\0'; buf[9] = '\0';
if (ISALPHA (buf[6]) && ISALPHA (buf[7]) && ISALPHA (buf[8]))
{
buf[6] = TOLOWER (buf[6]);
buf[7] = TOLOWER (buf[7]);
buf[8] = TOLOWER (buf[8]);
str += 6;
}
}
buf[10] = '\0'; buf[10] = '\0';
return hb_language_from_string ((char *) buf, -1); return hb_language_from_string (str, -1);
} }
} }

View File

@ -281,6 +281,8 @@ test_ot_tag_language (void)
g_assert_cmphex (HB_TAG_CHAR4 ("dflt"), ==, HB_OT_TAG_DEFAULT_LANGUAGE); g_assert_cmphex (HB_TAG_CHAR4 ("dflt"), ==, HB_OT_TAG_DEFAULT_LANGUAGE);
test_language_two_way ("dflt", NULL); test_language_two_way ("dflt", NULL);
test_language_two_way ("ALT", "alt");
test_language_two_way ("ARA", "ar"); test_language_two_way ("ARA", "ar");
test_language_two_way ("AZE", "az"); test_language_two_way ("AZE", "az");
@ -353,7 +355,8 @@ test_ot_tag_language (void)
test_tag_from_language ("ZHS", "zh"); /* Chinese */ test_tag_from_language ("ZHS", "zh"); /* Chinese */
test_tag_from_language ("ZHS", "zh-xx"); test_tag_from_language ("ZHS", "zh-xx");
test_language_two_way ("ABC", "x-hbotabc"); test_language_two_way ("ABC", "abc");
test_language_two_way ("ABCD", "x-hbotabcd");
test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc-zxc"); test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc-zxc");
test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc"); test_tag_from_language ("ABC", "asdf-asdf-wer-x-hbotabc");
test_tag_from_language ("ABCD", "asdf-asdf-wer-x-hbotabcd"); test_tag_from_language ("ABCD", "asdf-asdf-wer-x-hbotabcd");