[ot-tags] Further speed up language bsearch()

Using an integer tag to bsearch, instead of string.

Part of: https://github.com/harfbuzz/harfbuzz/issues/3591

Before:
------------------------------------------------------------------------------------------------
Benchmark                                                      Time             CPU   Iterations
------------------------------------------------------------------------------------------------
BM_hb_ot_tags_from_script_and_language/COMMON abcd_XY       8.11 ns         8.08 ns     87067795
BM_hb_ot_tags_from_script_and_language/COMMON zh_CN         53.6 ns         53.5 ns     13042418
BM_hb_ot_tags_from_script_and_language/COMMON en_US         24.2 ns         24.1 ns     29052731
BM_hb_ot_tags_from_script_and_language/LATIN en_US          24.4 ns         24.3 ns     28736769
BM_hb_ot_tags_from_script_and_language/COMMON none          4.43 ns         4.41 ns    160370413
BM_hb_ot_tags_from_script_and_language/LATIN none           4.35 ns         4.34 ns    160578191

After:
------------------------------------------------------------------------------------------------
Benchmark                                                      Time             CPU   Iterations
------------------------------------------------------------------------------------------------
BM_hb_ot_tags_from_script_and_language/COMMON abcd_XY       7.97 ns         7.95 ns     85208363
BM_hb_ot_tags_from_script_and_language/COMMON zh_CN         41.7 ns         41.6 ns     16945817
BM_hb_ot_tags_from_script_and_language/COMMON en_US         16.1 ns         16.0 ns     43613523
BM_hb_ot_tags_from_script_and_language/LATIN en_US          16.5 ns         16.4 ns     42568107
BM_hb_ot_tags_from_script_and_language/COMMON none          4.30 ns         4.29 ns    164055469
BM_hb_ot_tags_from_script_and_language/LATIN none           4.29 ns         4.27 ns    163793591
This commit is contained in:
Behdad Esfahbod 2022-05-17 15:51:41 -06:00
parent c460cf74ce
commit 909f00ac6e
3 changed files with 1622 additions and 1615 deletions

View File

@ -951,7 +951,7 @@ for language_len in (2, 3):
if len(language) != language_len: continue if len(language) != language_len: continue
commented_out = same_tag (language, tags) commented_out = same_tag (language, tags)
for i, tag in enumerate (tags, start=1): for i, tag in enumerate (tags, start=1):
print ('%s{\"%s\",\t%s},' % ('/*' if commented_out else ' ', language, hb_tag (tag)), end='') print ('%s{%s,\t%s},' % ('/*' if commented_out else ' ', hb_tag (language), hb_tag (tag)), end='')
if commented_out: if commented_out:
print ('*/', end='') print ('*/', end='')
print ('\t/* ', end='') print ('\t/* ', end='')

File diff suppressed because it is too large Load Diff

View File

@ -216,15 +216,15 @@ lang_matches (const char *lang_str, const char *limit, const char *spec, unsigne
struct LangTag struct LangTag
{ {
char language[4]; hb_tag_t language;
hb_tag_t tag; hb_tag_t tag;
int cmp (const char *a, unsigned len) const int cmp (hb_tag_t a) const
{ {
return strncmp (a, this->language, len); return a < this->language ? -1 : a > this->language ? +1 : 0;
} }
int cmp (const LangTag *that) const int cmp (const LangTag *that) const
{ return cmp (that->language, strlen (that->language)); } { return cmp (that->language); }
}; };
#include "hb-ot-tag-table.hh" #include "hb-ot-tag-table.hh"
@ -288,17 +288,17 @@ hb_ot_tags_from_language (const char *lang_str,
ot_languages = ot_languages3; ot_languages = ot_languages3;
ot_languages_len = ARRAY_LENGTH (ot_languages3); ot_languages_len = ARRAY_LENGTH (ot_languages3);
} }
if (hb_sorted_array (ot_languages, ot_languages_len).bsearch_impl (lang_str, &tag_idx, first_len)) if (hb_sorted_array (ot_languages, ot_languages_len).bfind (hb_tag_from_string (lang_str, first_len), &tag_idx))
{ {
unsigned int i; unsigned int i;
while (tag_idx != 0 && while (tag_idx != 0 &&
0 == strcmp (ot_languages[tag_idx].language, ot_languages[tag_idx - 1].language)) ot_languages[tag_idx].language == ot_languages[tag_idx - 1].language)
tag_idx--; tag_idx--;
for (i = 0; for (i = 0;
i < *count && i < *count &&
tag_idx + i < ot_languages_len && tag_idx + i < ot_languages_len &&
ot_languages[tag_idx + i].tag != HB_TAG_NONE && ot_languages[tag_idx + i].tag != HB_TAG_NONE &&
0 == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language); ot_languages[tag_idx + i].language == ot_languages[tag_idx].language;
i++) i++)
tags[i] = ot_languages[tag_idx + i].tag; tags[i] = ot_languages[tag_idx + i].tag;
*count = i; *count = i;
@ -464,12 +464,19 @@ hb_ot_tag_to_language (hb_tag_t tag)
return disambiguated_tag; return disambiguated_tag;
} }
char buf[4];
for (i = 0; i < ARRAY_LENGTH (ot_languages2); i++) for (i = 0; i < ARRAY_LENGTH (ot_languages2); i++)
if (ot_languages2[i].tag == tag) if (ot_languages2[i].tag == tag)
return hb_language_from_string (ot_languages2[i].language, -1); {
hb_tag_to_string (ot_languages2[i].language, buf);
return hb_language_from_string (buf, 2);
}
for (i = 0; i < ARRAY_LENGTH (ot_languages3); i++) for (i = 0; i < ARRAY_LENGTH (ot_languages3); i++)
if (ot_languages3[i].tag == tag) if (ot_languages3[i].tag == tag)
return hb_language_from_string (ot_languages3[i].language, -1); {
hb_tag_to_string (ot_languages3[i].language, buf);
return hb_language_from_string (buf, 3);
}
/* Return a custom language in the form of "x-hbot-AABBCCDD". /* Return a custom language in the form of "x-hbot-AABBCCDD".
* If it's three letters long, also guess it's ISO 639-3 and lower-case and * If it's three letters long, also guess it's ISO 639-3 and lower-case and
@ -570,7 +577,7 @@ test_langs_sorted ()
int c = ot_languages2[i].cmp (&ot_languages2[i - 1]); int c = ot_languages2[i].cmp (&ot_languages2[i - 1]);
if (c > 0) if (c > 0)
{ {
fprintf (stderr, "ot_languages2 not sorted at index %d: %s %d %s\n", fprintf (stderr, "ot_languages2 not sorted at index %d: %08x %d %08x\n",
i, ot_languages2[i-1].language, c, ot_languages2[i].language); i, ot_languages2[i-1].language, c, ot_languages2[i].language);
abort(); abort();
} }
@ -580,7 +587,7 @@ test_langs_sorted ()
int c = ot_languages3[i].cmp (&ot_languages3[i - 1]); int c = ot_languages3[i].cmp (&ot_languages3[i - 1]);
if (c > 0) if (c > 0)
{ {
fprintf (stderr, "ot_languages3 not sorted at index %d: %s %d %s\n", fprintf (stderr, "ot_languages3 not sorted at index %d: %08x %d %08x\n",
i, ot_languages3[i-1].language, c, ot_languages3[i].language); i, ot_languages3[i-1].language, c, ot_languages3[i].language);
abort(); abort();
} }