Refactor the selection of script and language tags

The old hb-ot-tag.cc functions, `hb_ot_tags_from_script` and
`hb_ot_tag_from_language`, are now wrappers around a new function:
`hb_ot_tags`. It converts a script and a language to arrays of script
tags and language tags. This will make it easier to add new script tags
to scripts, like 'dev3'. It also allows for language fallback chains;
nothing produces more than one language yet though.

Where the old functions return the default tags 'DFLT' and 'dflt',
`hb_ot_tags` returns an empty array. The caller is responsible for
using the default tag in that case.

The new function also adds a new private use subtag syntax for script
overrides: "x-hbscabcd" requests a script tag of 'abcd'.

The old hb-ot-layout.cc functions,`hb_ot_layout_table_choose_script` and
`hb_ot_layout_script_find_language` are now wrappers around the new
functions `hb_ot_layout_table_select_script` and
`hb_ot_layout_script_select_language`. They are essentially the same as
the old ones plus a tag count parameter.

Closes #495.
This commit is contained in:
David Corbett 2017-12-08 11:21:14 -05:00 committed by Behdad Esfahbod
parent a03f5f4dfb
commit 91067716f5
7 changed files with 1070 additions and 764 deletions

View File

@ -497,12 +497,14 @@ hb_ot_layout_lookups_substitute_closure
hb_ot_layout_lookup_would_substitute
hb_ot_layout_script_find_language
hb_ot_layout_script_get_language_tags
hb_ot_layout_script_select_language
hb_ot_layout_table_choose_script
hb_ot_layout_table_find_feature_variations
hb_ot_layout_table_find_script
hb_ot_layout_table_get_feature_tags
hb_ot_layout_table_get_script_tags
hb_ot_layout_table_get_lookup_count
hb_ot_layout_table_select_script
hb_ot_shape_plan_collect_lookups
hb_ot_layout_language_get_required_feature_index
<SUBSECTION Private>
@ -550,11 +552,14 @@ hb_ot_math_get_glyph_assembly
<SECTION>
<FILE>hb-ot-tag</FILE>
HB_OT_MAX_TAGS_PER_LANGUAGE
HB_OT_MAX_TAGS_PER_SCRIPT
HB_OT_TAG_DEFAULT_LANGUAGE
HB_OT_TAG_DEFAULT_SCRIPT
hb_ot_tag_from_language
hb_ot_tag_to_language
hb_ot_tag_to_script
hb_ot_tags
hb_ot_tags_from_script
</SECTION>

View File

@ -367,18 +367,32 @@ hb_ot_layout_table_choose_script (hb_face_t *face,
const hb_tag_t *script_tags,
unsigned int *script_index,
hb_tag_t *chosen_script)
{
const hb_tag_t *t;
for (t = script_tags; *t; t++);
return hb_ot_layout_table_select_script (face, table_tag, t - script_tags, script_tags, script_index, chosen_script);
}
hb_bool_t
hb_ot_layout_table_select_script (hb_face_t *face,
hb_tag_t table_tag,
unsigned int script_count,
const hb_tag_t *script_tags,
unsigned int *script_index /* OUT */,
hb_tag_t *chosen_script /* OUT */)
{
static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_SCRIPT_INDEX), "");
const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag);
unsigned int i;
while (*script_tags)
for (i = 0; i < script_count; i++)
{
if (g.find_script_index (*script_tags, script_index)) {
if (g.find_script_index (script_tags[i], script_index))
{
if (chosen_script)
*chosen_script = *script_tags;
*chosen_script = script_tags[i];
return true;
}
script_tags++;
}
/* try finding 'DFLT' */
@ -463,14 +477,29 @@ hb_ot_layout_script_find_language (hb_face_t *face,
unsigned int script_index,
hb_tag_t language_tag,
unsigned int *language_index)
{
return hb_ot_layout_script_select_language (face, table_tag, script_index, 1, &language_tag, language_index);
}
hb_bool_t
hb_ot_layout_script_select_language (hb_face_t *face,
hb_tag_t table_tag,
unsigned int script_index,
unsigned int language_count,
hb_tag_t *language_tags,
unsigned int *language_index /* OUT */)
{
static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_DEFAULT_LANGUAGE_INDEX), "");
const OT::Script &s = get_gsubgpos_table (face, table_tag).get_script (script_index);
unsigned int i;
if (s.find_lang_sys_index (language_tag, language_index))
return true;
for (i = 0; i < language_count; i++)
{
if (s.find_lang_sys_index (language_tags[i], language_index))
return true;
}
/* try with 'dflt'; MS site has had typos and many fonts use it now :( */
/* try finding 'dflt' */
if (s.find_lang_sys_index (HB_OT_TAG_DEFAULT_LANGUAGE, language_index))
return false;

View File

@ -119,6 +119,14 @@ hb_ot_layout_table_choose_script (hb_face_t *face,
unsigned int *script_index,
hb_tag_t *chosen_script);
HB_EXTERN hb_bool_t
hb_ot_layout_table_select_script (hb_face_t *face,
hb_tag_t table_tag,
unsigned int script_count,
const hb_tag_t *script_tags,
unsigned int *script_index /* OUT */,
hb_tag_t *chosen_script /* OUT */);
HB_EXTERN unsigned int
hb_ot_layout_table_get_feature_tags (hb_face_t *face,
hb_tag_t table_tag,
@ -141,6 +149,14 @@ hb_ot_layout_script_find_language (hb_face_t *face,
hb_tag_t language_tag,
unsigned int *language_index);
HB_EXTERN hb_bool_t
hb_ot_layout_script_select_language (hb_face_t *face,
hb_tag_t table_tag,
unsigned int script_index,
unsigned int language_count,
hb_tag_t *language_tags,
unsigned int *language_index /* OUT */);
HB_EXTERN hb_bool_t
hb_ot_layout_language_get_required_feature_index (hb_face_t *face,
hb_tag_t table_tag,

View File

@ -54,16 +54,17 @@ hb_ot_map_builder_t::hb_ot_map_builder_t (hb_face_t *face_,
/* Fetch script/language indices for GSUB/GPOS. We need these later to skip
* features not available in either table and not waste precious bits for them. */
hb_tag_t script_tags[3] = {HB_TAG_NONE, HB_TAG_NONE, HB_TAG_NONE};
hb_tag_t language_tag;
unsigned int script_count = HB_OT_MAX_TAGS_PER_SCRIPT;
unsigned int language_count = HB_OT_MAX_TAGS_PER_LANGUAGE;
hb_tag_t script_tags[HB_OT_MAX_TAGS_PER_SCRIPT];
hb_tag_t language_tags[HB_OT_MAX_TAGS_PER_LANGUAGE];
hb_ot_tags_from_script (props.script, &script_tags[0], &script_tags[1]);
language_tag = hb_ot_tag_from_language (props.language);
hb_ot_tags (props.script, props.language, &script_count, script_tags, &language_count, language_tags);
for (unsigned int table_index = 0; table_index < 2; table_index++) {
hb_tag_t table_tag = table_tags[table_index];
found_script[table_index] = (bool) hb_ot_layout_table_choose_script (face, table_tag, script_tags, &script_index[table_index], &chosen_script[table_index]);
hb_ot_layout_script_find_language (face, table_tag, script_index[table_index], language_tag, &language_index[table_index]);
found_script[table_index] = (bool) hb_ot_layout_table_select_script (face, table_tag, script_count, script_tags, &script_index[table_index], &chosen_script[table_index]);
hb_ot_layout_script_select_language (face, table_tag, script_index[table_index], language_count, language_tags, &language_index[table_index]);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -39,6 +39,17 @@ HB_BEGIN_DECLS
#define HB_OT_TAG_DEFAULT_SCRIPT HB_TAG ('D', 'F', 'L', 'T')
#define HB_OT_TAG_DEFAULT_LANGUAGE HB_TAG ('d', 'f', 'l', 't')
#define HB_OT_MAX_TAGS_PER_SCRIPT 2u
#define HB_OT_MAX_TAGS_PER_LANGUAGE 1u
HB_EXTERN void
hb_ot_tags (hb_script_t script,
hb_language_t language,
unsigned int *script_count /* IN/OUT */,
hb_tag_t *script_tags /* OUT */,
unsigned int *language_count /* IN/OUT */,
hb_tag_t *language_tags /* OUT */);
HB_EXTERN void
hb_ot_tags_from_script (hb_script_t script,
hb_tag_t *script_tag_1,

View File

@ -50,6 +50,25 @@ test_simple_tags (const char *s, hb_script_t script)
g_assert_cmphex (hb_ot_tag_to_script (tag), ==, script);
}
static void
test_script_tags_from_language (const char *s, const char *lang_s, hb_script_t script)
{
hb_script_t tag;
unsigned int count = 1;
hb_script_t t;
g_test_message ("Testing script %c%c%c%c: script tag %s, language tag %s", HB_UNTAG (hb_script_to_iso15924_tag (script)), s, lang_s);
tag = hb_tag_from_string (s, -1);
hb_ot_tags (script, hb_language_from_string (lang_s, -1), &count, &t, NULL, NULL);
if (count != 0)
{
g_assert_cmpuint (count, ==, 1);
g_assert_cmphex (t, ==, tag);
}
}
static void
test_indic_tags (const char *s1, const char *s2, hb_script_t script)
{
@ -120,6 +139,26 @@ test_ot_tag_script_simple (void)
test_simple_tags ("mand", HB_SCRIPT_MANDAIC);
}
static void
test_ot_tag_script_from_language (void)
{
test_script_tags_from_language (NULL, NULL, HB_SCRIPT_INVALID);
test_script_tags_from_language (NULL, "en", HB_SCRIPT_INVALID);
test_script_tags_from_language ("copt", "en", HB_SCRIPT_COPTIC);
test_script_tags_from_language (NULL, "x-hbsc", HB_SCRIPT_INVALID);
test_script_tags_from_language ("copt", "x-hbsc", HB_SCRIPT_COPTIC);
test_script_tags_from_language ("abc ", "x-hbscabc", HB_SCRIPT_INVALID);
test_script_tags_from_language ("deva", "x-hbscdeva", HB_SCRIPT_INVALID);
test_script_tags_from_language ("dev2", "x-hbscdev2", HB_SCRIPT_INVALID);
test_script_tags_from_language ("copt", "x-hbotpap0-hbsccopt", HB_SCRIPT_INVALID);
test_script_tags_from_language (NULL, "en-x-hbsc", HB_SCRIPT_INVALID);
test_script_tags_from_language ("copt", "en-x-hbsc", HB_SCRIPT_COPTIC);
test_script_tags_from_language ("abc ", "en-x-hbscabc", HB_SCRIPT_INVALID);
test_script_tags_from_language ("deva", "en-x-hbscdeva", HB_SCRIPT_INVALID);
test_script_tags_from_language ("dev2", "en-x-hbscdev2", HB_SCRIPT_INVALID);
test_script_tags_from_language ("copt", "en-x-hbotpap0-hbsccopt", HB_SCRIPT_INVALID);
}
static void
test_ot_tag_script_indic (void)
{
@ -262,8 +301,24 @@ test_ot_tag_language (void)
test_tag_from_language ("XYZ", "xyz"); /* Unknown ISO 639-3 */
test_tag_from_language ("XYZ", "xyz-qw"); /* Unknown ISO 639-3 */
/*
* Invalid input. The precise answer does not matter, as long as it
* does not crash or get into an infinite loop.
*/
test_tag_from_language ("dflt", "-fonipa");
/*
* Tags that contain "-fonipa" as a substring but which do not contain
* the subtag "fonipa".
*/
test_tag_from_language ("ENG", "en-fonipax");
test_tag_from_language ("ENG", "en-x-fonipa");
test_tag_from_language ("ENG", "en-a-fonipa");
test_tag_from_language ("ENG", "en-a-qwe-b-fonipa");
/* International Phonetic Alphabet */
test_tag_from_language ("IPPH", "en-fonipa");
test_tag_from_language ("IPPH", "en-fonipax-fonipa");
test_tag_from_language ("IPPH", "rm-CH-fonipa-sursilv-x-foobar");
test_tag_from_language ("IPPH", "und-fonipa");
test_tag_from_language ("IPPH", "zh-fonipa");
@ -305,6 +360,55 @@ test_ot_tag_language (void)
test_tag_from_language ("ABC", "xyz-xy-x-hbotabc-zxc");
}
static void
test_tags (hb_script_t script,
const char *lang_s,
unsigned int script_count,
unsigned int language_count,
unsigned int expected_script_count,
unsigned int expected_language_count,
...)
{
va_list expected_tags;
unsigned int i;
hb_tag_t *script_tags = malloc (script_count * sizeof (hb_tag_t));
hb_tag_t *language_tags = malloc (language_count * sizeof (hb_tag_t));
g_assert_nonnull (script_tags);
g_assert_nonnull (language_tags);
hb_language_t lang = hb_language_from_string (lang_s, -1);
va_start (expected_tags, expected_language_count);
hb_ot_tags (script, lang, &script_count, script_tags, &language_count, language_tags);
g_assert_cmpuint (script_count, ==, expected_script_count);
g_assert_cmpuint (language_count, ==, expected_language_count);
for (i = 0; i < script_count + language_count; i++)
{
hb_tag_t expected_tag = hb_tag_from_string (va_arg (expected_tags, const char *), -1);
hb_tag_t actual_tag = i < script_count ? script_tags[i] : language_tags[i - script_count];
g_assert_cmphex (actual_tag, ==, expected_tag);
}
free (script_tags);
free (language_tags);
va_end (expected_tags);
}
static void
test_ot_tag_full (void)
{
test_tags (HB_SCRIPT_INVALID, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "ENG");
test_tags (HB_SCRIPT_LATIN, "en", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "latn", "ENG");
test_tags (HB_SCRIPT_LATIN, "en", 0, 0, 0, 0);
test_tags (HB_SCRIPT_INVALID, "en-fonnapa", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "APPH");
test_tags (HB_SCRIPT_INVALID, "x-hbot1234-hbsc5678", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 1, 1, "5678", "1234");
test_tags (HB_SCRIPT_MALAYALAM, "ml", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 2, 1, "mlm2", "mlym", "MLR");
test_tags (HB_SCRIPT_MALAYALAM, "ml", 1, 1, 1, 1, "mlm2", "MLR");
test_tags (HB_SCRIPT_INVALID, "xyz", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 1, "XYZ");
test_tags (HB_SCRIPT_INVALID, "xy", HB_OT_MAX_TAGS_PER_SCRIPT, HB_OT_MAX_TAGS_PER_LANGUAGE, 0, 0);
}
int
main (int argc, char **argv)
{
@ -312,9 +416,12 @@ main (int argc, char **argv)
hb_test_add (test_ot_tag_script_degenerate);
hb_test_add (test_ot_tag_script_simple);
hb_test_add (test_ot_tag_script_from_language);
hb_test_add (test_ot_tag_script_indic);
hb_test_add (test_ot_tag_language);
hb_test_add (test_ot_tag_full);
return hb_test_run();
}