[ot-tags] Speed up hb_ot_tags_from_language()
Part of https://github.com/harfbuzz/harfbuzz/issues/3591 "After that, bulk of the time I suppose is spent in binary-searching the language table. I suggest we split the language table in 2-letter and 3-letter tags, to speed-up the vast majority of cases that are 2-letter." benchmark-ot, before: ---------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------- BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 112 ns 111 ns 6286271 BM_hb_ot_tags_from_script_and_language/COMMON en_US 60.6 ns 60.4 ns 11671176 BM_hb_ot_tags_from_script_and_language/LATIN en_US 61.3 ns 61.1 ns 11442645 BM_hb_ot_tags_from_script_and_language/COMMON none 4.75 ns 4.74 ns 146997235 BM_hb_ot_tags_from_script_and_language/LATIN none 4.65 ns 4.64 ns 150938747 After: ---------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------- BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 89.5 ns 89.2 ns 7747649 BM_hb_ot_tags_from_script_and_language/COMMON en_US 38.5 ns 38.4 ns 18199432 BM_hb_ot_tags_from_script_and_language/LATIN en_US 39.0 ns 38.9 ns 18049238 BM_hb_ot_tags_from_script_and_language/COMMON none 4.53 ns 4.52 ns 154895110 BM_hb_ot_tags_from_script_and_language/LATIN none 4.54 ns 4.52 ns 154762105
This commit is contained in:
parent
9baccb9860
commit
dd3c858f84
|
@ -894,7 +894,6 @@ print ()
|
|||
print ('#ifndef HB_OT_TAG_TABLE_HH')
|
||||
print ('#define HB_OT_TAG_TABLE_HH')
|
||||
print ()
|
||||
print ('static const LangTag ot_languages[] = {')
|
||||
|
||||
def hb_tag (tag):
|
||||
"""Convert a tag to ``HB_TAG`` form.
|
||||
|
@ -944,33 +943,35 @@ def get_matching_language_name (intersection, candidates):
|
|||
def same_tag (bcp_47_tag, ot_tags):
|
||||
return len (bcp_47_tag) == 3 and len (ot_tags) == 1 and bcp_47_tag == ot_tags[0].lower ()
|
||||
|
||||
for language, tags in sorted (ot.from_bcp_47.items ()):
|
||||
if language == '' or '-' in language:
|
||||
continue
|
||||
commented_out = same_tag (language, tags)
|
||||
for i, tag in enumerate (tags, start=1):
|
||||
print ('%s{\"%s\",\t%s},' % ('/*' if commented_out else ' ', language, hb_tag (tag)), end='')
|
||||
if commented_out:
|
||||
print ('*/', end='')
|
||||
print ('\t/* ', end='')
|
||||
bcp_47_name = bcp_47.names.get (language, '')
|
||||
bcp_47_name_candidates = bcp_47_name.split ('\n')
|
||||
ot_name = ot.names[tag]
|
||||
scope = bcp_47.scopes.get (language, '')
|
||||
if tag == DEFAULT_LANGUAGE_SYSTEM:
|
||||
write (f'{bcp_47_name_candidates[0]}{scope} != {ot.names[language.upper ()]}')
|
||||
else:
|
||||
intersection = language_name_intersection (bcp_47_name, ot_name)
|
||||
if not intersection:
|
||||
write ('%s%s -> %s' % (bcp_47_name_candidates[0], scope, ot_name))
|
||||
for language_len in (2, 3):
|
||||
print ('static const LangTag ot_languages%d[] = {' % language_len)
|
||||
for language, tags in sorted (ot.from_bcp_47.items ()):
|
||||
if language == '' or '-' in language:
|
||||
continue
|
||||
if len(language) != language_len: continue
|
||||
commented_out = same_tag (language, tags)
|
||||
for i, tag in enumerate (tags, start=1):
|
||||
print ('%s{\"%s\",\t%s},' % ('/*' if commented_out else ' ', language, hb_tag (tag)), end='')
|
||||
if commented_out:
|
||||
print ('*/', end='')
|
||||
print ('\t/* ', end='')
|
||||
bcp_47_name = bcp_47.names.get (language, '')
|
||||
bcp_47_name_candidates = bcp_47_name.split ('\n')
|
||||
ot_name = ot.names[tag]
|
||||
scope = bcp_47.scopes.get (language, '')
|
||||
if tag == DEFAULT_LANGUAGE_SYSTEM:
|
||||
write (f'{bcp_47_name_candidates[0]}{scope} != {ot.names[language.upper ()]}')
|
||||
else:
|
||||
name = get_matching_language_name (intersection, bcp_47_name_candidates)
|
||||
bcp_47.names[language] = name
|
||||
write ('%s%s' % (name if len (name) > len (ot_name) else ot_name, scope))
|
||||
print (' */')
|
||||
|
||||
print ('};')
|
||||
print ()
|
||||
intersection = language_name_intersection (bcp_47_name, ot_name)
|
||||
if not intersection:
|
||||
write ('%s%s -> %s' % (bcp_47_name_candidates[0], scope, ot_name))
|
||||
else:
|
||||
name = get_matching_language_name (intersection, bcp_47_name_candidates)
|
||||
bcp_47.names[language] = name
|
||||
write ('%s%s' % (name if len (name) > len (ot_name) else ot_name, scope))
|
||||
print (' */')
|
||||
print ('};')
|
||||
print ()
|
||||
|
||||
print ('/**')
|
||||
print (' * hb_ot_tags_from_complex_language:')
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -283,7 +283,21 @@ hb_ot_tags_from_language (const char *lang_str,
|
|||
ISALPHA (s[1]))
|
||||
lang_str = s + 1;
|
||||
}
|
||||
if (hb_sorted_array (ot_languages).bfind (lang_str, &tag_idx))
|
||||
const LangTag *ot_languages = nullptr;
|
||||
unsigned ot_languages_len = 0;
|
||||
const char *dash = strchr (lang_str, '-');
|
||||
unsigned first_len = dash ? dash - lang_str : limit - lang_str;
|
||||
if (first_len == 2)
|
||||
{
|
||||
ot_languages = ot_languages2;
|
||||
ot_languages_len = ARRAY_LENGTH (ot_languages2);
|
||||
}
|
||||
else if (first_len == 3)
|
||||
{
|
||||
ot_languages = ot_languages3;
|
||||
ot_languages_len = ARRAY_LENGTH (ot_languages3);
|
||||
}
|
||||
if (hb_sorted_array (ot_languages, ot_languages_len).bfind (lang_str, &tag_idx))
|
||||
{
|
||||
unsigned int i;
|
||||
while (tag_idx != 0 &&
|
||||
|
@ -291,7 +305,7 @@ hb_ot_tags_from_language (const char *lang_str,
|
|||
tag_idx--;
|
||||
for (i = 0;
|
||||
i < *count &&
|
||||
tag_idx + i < ARRAY_LENGTH (ot_languages) &&
|
||||
tag_idx + i < ot_languages_len &&
|
||||
ot_languages[tag_idx + i].tag != HB_TAG_NONE &&
|
||||
0 == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language);
|
||||
i++)
|
||||
|
@ -459,9 +473,12 @@ hb_ot_tag_to_language (hb_tag_t tag)
|
|||
return disambiguated_tag;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
|
||||
if (ot_languages[i].tag == tag)
|
||||
return hb_language_from_string (ot_languages[i].language, -1);
|
||||
for (i = 0; i < ARRAY_LENGTH (ot_languages2); i++)
|
||||
if (ot_languages2[i].tag == tag)
|
||||
return hb_language_from_string (ot_languages2[i].language, -1);
|
||||
for (i = 0; i < ARRAY_LENGTH (ot_languages3); i++)
|
||||
if (ot_languages3[i].tag == tag)
|
||||
return hb_language_from_string (ot_languages3[i].language, -1);
|
||||
|
||||
/* Return a custom language in the form of "x-hbot-AABBCCDD".
|
||||
* If it's three letters long, also guess it's ISO 639-3 and lower-case and
|
||||
|
@ -557,13 +574,23 @@ hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
|
|||
static inline void
|
||||
test_langs_sorted ()
|
||||
{
|
||||
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages); i++)
|
||||
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages2); i++)
|
||||
{
|
||||
int c = ot_languages[i].cmp (&ot_languages[i - 1]);
|
||||
int c = ot_languages2[i].cmp (&ot_languages2[i - 1]);
|
||||
if (c > 0)
|
||||
{
|
||||
fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n",
|
||||
i, ot_languages[i-1].language, c, ot_languages[i].language);
|
||||
fprintf (stderr, "ot_languages2 not sorted at index %d: %s %d %s\n",
|
||||
i, ot_languages2[i-1].language, c, ot_languages2[i].language);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages3); i++)
|
||||
{
|
||||
int c = ot_languages3[i].cmp (&ot_languages3[i - 1]);
|
||||
if (c > 0)
|
||||
{
|
||||
fprintf (stderr, "ot_languages3 not sorted at index %d: %s %d %s\n",
|
||||
i, ot_languages3[i-1].language, c, ot_languages3[i].language);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue