[ot-tags] Speed up hb_ot_tags_from_language()
Part of https://github.com/harfbuzz/harfbuzz/issues/3591 "After that, bulk of the time I suppose is spent in binary-searching the language table. I suggest we split the language table in 2-letter and 3-letter tags, to speed-up the vast majority of cases that are 2-letter." benchmark-ot, before: ---------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------- BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 112 ns 111 ns 6286271 BM_hb_ot_tags_from_script_and_language/COMMON en_US 60.6 ns 60.4 ns 11671176 BM_hb_ot_tags_from_script_and_language/LATIN en_US 61.3 ns 61.1 ns 11442645 BM_hb_ot_tags_from_script_and_language/COMMON none 4.75 ns 4.74 ns 146997235 BM_hb_ot_tags_from_script_and_language/LATIN none 4.65 ns 4.64 ns 150938747 After: ---------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------- BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 89.5 ns 89.2 ns 7747649 BM_hb_ot_tags_from_script_and_language/COMMON en_US 38.5 ns 38.4 ns 18199432 BM_hb_ot_tags_from_script_and_language/LATIN en_US 39.0 ns 38.9 ns 18049238 BM_hb_ot_tags_from_script_and_language/COMMON none 4.53 ns 4.52 ns 154895110 BM_hb_ot_tags_from_script_and_language/LATIN none 4.54 ns 4.52 ns 154762105
This commit is contained in:
parent
9baccb9860
commit
dd3c858f84
|
@ -894,7 +894,6 @@ print ()
|
||||||
print ('#ifndef HB_OT_TAG_TABLE_HH')
|
print ('#ifndef HB_OT_TAG_TABLE_HH')
|
||||||
print ('#define HB_OT_TAG_TABLE_HH')
|
print ('#define HB_OT_TAG_TABLE_HH')
|
||||||
print ()
|
print ()
|
||||||
print ('static const LangTag ot_languages[] = {')
|
|
||||||
|
|
||||||
def hb_tag (tag):
|
def hb_tag (tag):
|
||||||
"""Convert a tag to ``HB_TAG`` form.
|
"""Convert a tag to ``HB_TAG`` form.
|
||||||
|
@ -944,33 +943,35 @@ def get_matching_language_name (intersection, candidates):
|
||||||
def same_tag (bcp_47_tag, ot_tags):
|
def same_tag (bcp_47_tag, ot_tags):
|
||||||
return len (bcp_47_tag) == 3 and len (ot_tags) == 1 and bcp_47_tag == ot_tags[0].lower ()
|
return len (bcp_47_tag) == 3 and len (ot_tags) == 1 and bcp_47_tag == ot_tags[0].lower ()
|
||||||
|
|
||||||
for language, tags in sorted (ot.from_bcp_47.items ()):
|
for language_len in (2, 3):
|
||||||
if language == '' or '-' in language:
|
print ('static const LangTag ot_languages%d[] = {' % language_len)
|
||||||
continue
|
for language, tags in sorted (ot.from_bcp_47.items ()):
|
||||||
commented_out = same_tag (language, tags)
|
if language == '' or '-' in language:
|
||||||
for i, tag in enumerate (tags, start=1):
|
continue
|
||||||
print ('%s{\"%s\",\t%s},' % ('/*' if commented_out else ' ', language, hb_tag (tag)), end='')
|
if len(language) != language_len: continue
|
||||||
if commented_out:
|
commented_out = same_tag (language, tags)
|
||||||
print ('*/', end='')
|
for i, tag in enumerate (tags, start=1):
|
||||||
print ('\t/* ', end='')
|
print ('%s{\"%s\",\t%s},' % ('/*' if commented_out else ' ', language, hb_tag (tag)), end='')
|
||||||
bcp_47_name = bcp_47.names.get (language, '')
|
if commented_out:
|
||||||
bcp_47_name_candidates = bcp_47_name.split ('\n')
|
print ('*/', end='')
|
||||||
ot_name = ot.names[tag]
|
print ('\t/* ', end='')
|
||||||
scope = bcp_47.scopes.get (language, '')
|
bcp_47_name = bcp_47.names.get (language, '')
|
||||||
if tag == DEFAULT_LANGUAGE_SYSTEM:
|
bcp_47_name_candidates = bcp_47_name.split ('\n')
|
||||||
write (f'{bcp_47_name_candidates[0]}{scope} != {ot.names[language.upper ()]}')
|
ot_name = ot.names[tag]
|
||||||
else:
|
scope = bcp_47.scopes.get (language, '')
|
||||||
intersection = language_name_intersection (bcp_47_name, ot_name)
|
if tag == DEFAULT_LANGUAGE_SYSTEM:
|
||||||
if not intersection:
|
write (f'{bcp_47_name_candidates[0]}{scope} != {ot.names[language.upper ()]}')
|
||||||
write ('%s%s -> %s' % (bcp_47_name_candidates[0], scope, ot_name))
|
|
||||||
else:
|
else:
|
||||||
name = get_matching_language_name (intersection, bcp_47_name_candidates)
|
intersection = language_name_intersection (bcp_47_name, ot_name)
|
||||||
bcp_47.names[language] = name
|
if not intersection:
|
||||||
write ('%s%s' % (name if len (name) > len (ot_name) else ot_name, scope))
|
write ('%s%s -> %s' % (bcp_47_name_candidates[0], scope, ot_name))
|
||||||
print (' */')
|
else:
|
||||||
|
name = get_matching_language_name (intersection, bcp_47_name_candidates)
|
||||||
print ('};')
|
bcp_47.names[language] = name
|
||||||
print ()
|
write ('%s%s' % (name if len (name) > len (ot_name) else ot_name, scope))
|
||||||
|
print (' */')
|
||||||
|
print ('};')
|
||||||
|
print ()
|
||||||
|
|
||||||
print ('/**')
|
print ('/**')
|
||||||
print (' * hb_ot_tags_from_complex_language:')
|
print (' * hb_ot_tags_from_complex_language:')
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -283,7 +283,21 @@ hb_ot_tags_from_language (const char *lang_str,
|
||||||
ISALPHA (s[1]))
|
ISALPHA (s[1]))
|
||||||
lang_str = s + 1;
|
lang_str = s + 1;
|
||||||
}
|
}
|
||||||
if (hb_sorted_array (ot_languages).bfind (lang_str, &tag_idx))
|
const LangTag *ot_languages = nullptr;
|
||||||
|
unsigned ot_languages_len = 0;
|
||||||
|
const char *dash = strchr (lang_str, '-');
|
||||||
|
unsigned first_len = dash ? dash - lang_str : limit - lang_str;
|
||||||
|
if (first_len == 2)
|
||||||
|
{
|
||||||
|
ot_languages = ot_languages2;
|
||||||
|
ot_languages_len = ARRAY_LENGTH (ot_languages2);
|
||||||
|
}
|
||||||
|
else if (first_len == 3)
|
||||||
|
{
|
||||||
|
ot_languages = ot_languages3;
|
||||||
|
ot_languages_len = ARRAY_LENGTH (ot_languages3);
|
||||||
|
}
|
||||||
|
if (hb_sorted_array (ot_languages, ot_languages_len).bfind (lang_str, &tag_idx))
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
while (tag_idx != 0 &&
|
while (tag_idx != 0 &&
|
||||||
|
@ -291,7 +305,7 @@ hb_ot_tags_from_language (const char *lang_str,
|
||||||
tag_idx--;
|
tag_idx--;
|
||||||
for (i = 0;
|
for (i = 0;
|
||||||
i < *count &&
|
i < *count &&
|
||||||
tag_idx + i < ARRAY_LENGTH (ot_languages) &&
|
tag_idx + i < ot_languages_len &&
|
||||||
ot_languages[tag_idx + i].tag != HB_TAG_NONE &&
|
ot_languages[tag_idx + i].tag != HB_TAG_NONE &&
|
||||||
0 == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language);
|
0 == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language);
|
||||||
i++)
|
i++)
|
||||||
|
@ -459,9 +473,12 @@ hb_ot_tag_to_language (hb_tag_t tag)
|
||||||
return disambiguated_tag;
|
return disambiguated_tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_LENGTH (ot_languages); i++)
|
for (i = 0; i < ARRAY_LENGTH (ot_languages2); i++)
|
||||||
if (ot_languages[i].tag == tag)
|
if (ot_languages2[i].tag == tag)
|
||||||
return hb_language_from_string (ot_languages[i].language, -1);
|
return hb_language_from_string (ot_languages2[i].language, -1);
|
||||||
|
for (i = 0; i < ARRAY_LENGTH (ot_languages3); i++)
|
||||||
|
if (ot_languages3[i].tag == tag)
|
||||||
|
return hb_language_from_string (ot_languages3[i].language, -1);
|
||||||
|
|
||||||
/* Return a custom language in the form of "x-hbot-AABBCCDD".
|
/* Return a custom language in the form of "x-hbot-AABBCCDD".
|
||||||
* If it's three letters long, also guess it's ISO 639-3 and lower-case and
|
* If it's three letters long, also guess it's ISO 639-3 and lower-case and
|
||||||
|
@ -557,13 +574,23 @@ hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
|
||||||
static inline void
|
static inline void
|
||||||
test_langs_sorted ()
|
test_langs_sorted ()
|
||||||
{
|
{
|
||||||
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages); i++)
|
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages2); i++)
|
||||||
{
|
{
|
||||||
int c = ot_languages[i].cmp (&ot_languages[i - 1]);
|
int c = ot_languages2[i].cmp (&ot_languages2[i - 1]);
|
||||||
if (c > 0)
|
if (c > 0)
|
||||||
{
|
{
|
||||||
fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n",
|
fprintf (stderr, "ot_languages2 not sorted at index %d: %s %d %s\n",
|
||||||
i, ot_languages[i-1].language, c, ot_languages[i].language);
|
i, ot_languages2[i-1].language, c, ot_languages2[i].language);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages3); i++)
|
||||||
|
{
|
||||||
|
int c = ot_languages3[i].cmp (&ot_languages3[i - 1]);
|
||||||
|
if (c > 0)
|
||||||
|
{
|
||||||
|
fprintf (stderr, "ot_languages3 not sorted at index %d: %s %d %s\n",
|
||||||
|
i, ot_languages3[i-1].language, c, ot_languages3[i].language);
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue