[ot-tags] Speed up hb_ot_tags_from_complex_language()
Part of https://github.com/harfbuzz/harfbuzz/issues/3591 2. All the subtag_matches outside the switch match long strings (>= 6 or so). As such, check the tag for such length before going into any of them. benchmark-ot, before: ---------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------- BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 172 ns 171 ns 4083155 BM_hb_ot_tags_from_script_and_language/COMMON en_US 120 ns 119 ns 5849947 BM_hb_ot_tags_from_script_and_language/LATIN en_US 113 ns 112 ns 5840326 BM_hb_ot_tags_from_script_and_language/COMMON none 4.66 ns 4.64 ns 151396224 BM_hb_ot_tags_from_script_and_language/LATIN none 4.66 ns 4.64 ns 149019593 After: ---------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------- BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 112 ns 112 ns 6357763 BM_hb_ot_tags_from_script_and_language/COMMON en_US 60.5 ns 60.3 ns 11475091 BM_hb_ot_tags_from_script_and_language/LATIN en_US 54.9 ns 54.8 ns 12575690 BM_hb_ot_tags_from_script_and_language/COMMON none 4.61 ns 4.59 ns 152388450 BM_hb_ot_tags_from_script_and_language/LATIN none 4.66 ns 4.64 ns 151497600
This commit is contained in:
parent
26d906b88b
commit
9baccb9860
|
@ -1009,6 +1009,24 @@ for initial, group in itertools.groupby ((lt_tags for lt_tags in [
|
|||
key=lambda lt_tags: lt_tags[0].get_group ()):
|
||||
complex_tags[initial] += group
|
||||
|
||||
# Calculate the min length of the subtags outside the switch
|
||||
min_subtag_len = 100
|
||||
for initial, items in sorted (complex_tags.items ()):
|
||||
if initial != 'und':
|
||||
continue
|
||||
for lt, tags in items:
|
||||
if not tags:
|
||||
continue
|
||||
subtag_len = 0
|
||||
subtag_len += len(lt.script) if lt.script is not None else 0
|
||||
subtag_len += len(lt.region) if lt.region is not None else 0
|
||||
subtag_len += len(lt.variant) if lt.variant is not None else 0
|
||||
min_subtag_len = min(subtag_len, min_subtag_len)
|
||||
min_subtag_len += 1 # For initial '-'
|
||||
|
||||
print (' if (limit - lang_str > %d ||' % min_subtag_len)
|
||||
print (" (limit - lang_str == %d && *lang_str == '-'))" % min_subtag_len)
|
||||
print (' {')
|
||||
for initial, items in sorted (complex_tags.items ()):
|
||||
if initial != 'und':
|
||||
continue
|
||||
|
@ -1018,29 +1036,30 @@ for initial, items in sorted (complex_tags.items ()):
|
|||
if lt.variant in bcp_47.prefixes:
|
||||
expect (next (iter (bcp_47.prefixes[lt.variant])) == lt.language,
|
||||
'%s is not a valid prefix of %s' % (lt.language, lt.variant))
|
||||
print (' if (', end='')
|
||||
print (' if (', end='')
|
||||
print_subtag_matches (lt.script, False)
|
||||
print_subtag_matches (lt.region, False)
|
||||
print_subtag_matches (lt.variant, False)
|
||||
print (')')
|
||||
print (' {')
|
||||
write (' /* %s */' % bcp_47.get_name (lt))
|
||||
print (' {')
|
||||
write (' /* %s */' % bcp_47.get_name (lt))
|
||||
print ()
|
||||
if len (tags) == 1:
|
||||
write (' tags[0] = %s; /* %s */' % (hb_tag (tags[0]), ot.names[tags[0]]))
|
||||
write (' tags[0] = %s; /* %s */' % (hb_tag (tags[0]), ot.names[tags[0]]))
|
||||
print ()
|
||||
print (' *count = 1;')
|
||||
print (' *count = 1;')
|
||||
else:
|
||||
print (' hb_tag_t possible_tags[] = {')
|
||||
for tag in tags:
|
||||
write (' %s, /* %s */' % (hb_tag (tag), ot.names[tag]))
|
||||
print ()
|
||||
print (' };')
|
||||
print (' for (i = 0; i < %s && i < *count; i++)' % len (tags))
|
||||
print (' tags[i] = possible_tags[i];')
|
||||
print (' *count = i;')
|
||||
print (' return true;')
|
||||
print (' }')
|
||||
print (' };')
|
||||
print (' for (i = 0; i < %s && i < *count; i++)' % len (tags))
|
||||
print (' tags[i] = possible_tags[i];')
|
||||
print (' *count = i;')
|
||||
print (' return true;')
|
||||
print (' }')
|
||||
print (' }')
|
||||
|
||||
print (' switch (lang_str[0])')
|
||||
print (' {')
|
||||
|
|
|
@ -1639,68 +1639,72 @@ hb_ot_tags_from_complex_language (const char *lang_str,
|
|||
unsigned int *count /* IN/OUT */,
|
||||
hb_tag_t *tags /* OUT */)
|
||||
{
|
||||
if (subtag_matches (lang_str, limit, "-fonnapa"))
|
||||
if (limit - lang_str > 5 ||
|
||||
(limit - lang_str == 5 && *lang_str == '-'))
|
||||
{
|
||||
/* Undetermined; North American Phonetic Alphabet */
|
||||
tags[0] = HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-polyton"))
|
||||
{
|
||||
/* Modern Greek (1453-); Polytonic Greek */
|
||||
tags[0] = HB_TAG('P','G','R',' '); /* Polytonic Greek */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-arevmda"))
|
||||
{
|
||||
/* Armenian; Western Armenian (retired code) */
|
||||
tags[0] = HB_TAG('H','Y','E',' '); /* Armenian */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-provenc"))
|
||||
{
|
||||
/* Occitan (post 1500); Provençal */
|
||||
tags[0] = HB_TAG('P','R','O',' '); /* Provençal / Old Provençal */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-fonipa"))
|
||||
{
|
||||
/* Undetermined; International Phonetic Alphabet */
|
||||
tags[0] = HB_TAG('I','P','P','H'); /* Phonetic transcription—IPA conventions */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-geok"))
|
||||
{
|
||||
/* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */
|
||||
tags[0] = HB_TAG('K','G','E',' '); /* Khutsuri Georgian */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-syre"))
|
||||
{
|
||||
/* Undetermined; Syriac (Estrangelo variant) */
|
||||
tags[0] = HB_TAG('S','Y','R','E'); /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-syrj"))
|
||||
{
|
||||
/* Undetermined; Syriac (Western variant) */
|
||||
tags[0] = HB_TAG('S','Y','R','J'); /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-syrn"))
|
||||
{
|
||||
/* Undetermined; Syriac (Eastern variant) */
|
||||
tags[0] = HB_TAG('S','Y','R','N'); /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */
|
||||
*count = 1;
|
||||
return true;
|
||||
if (subtag_matches (lang_str, limit, "-fonnapa"))
|
||||
{
|
||||
/* Undetermined; North American Phonetic Alphabet */
|
||||
tags[0] = HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-polyton"))
|
||||
{
|
||||
/* Modern Greek (1453-); Polytonic Greek */
|
||||
tags[0] = HB_TAG('P','G','R',' '); /* Polytonic Greek */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-arevmda"))
|
||||
{
|
||||
/* Armenian; Western Armenian (retired code) */
|
||||
tags[0] = HB_TAG('H','Y','E',' '); /* Armenian */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-provenc"))
|
||||
{
|
||||
/* Occitan (post 1500); Provençal */
|
||||
tags[0] = HB_TAG('P','R','O',' '); /* Provençal / Old Provençal */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-fonipa"))
|
||||
{
|
||||
/* Undetermined; International Phonetic Alphabet */
|
||||
tags[0] = HB_TAG('I','P','P','H'); /* Phonetic transcription—IPA conventions */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-geok"))
|
||||
{
|
||||
/* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */
|
||||
tags[0] = HB_TAG('K','G','E',' '); /* Khutsuri Georgian */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-syre"))
|
||||
{
|
||||
/* Undetermined; Syriac (Estrangelo variant) */
|
||||
tags[0] = HB_TAG('S','Y','R','E'); /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-syrj"))
|
||||
{
|
||||
/* Undetermined; Syriac (Western variant) */
|
||||
tags[0] = HB_TAG('S','Y','R','J'); /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
if (subtag_matches (lang_str, limit, "-syrn"))
|
||||
{
|
||||
/* Undetermined; Syriac (Eastern variant) */
|
||||
tags[0] = HB_TAG('S','Y','R','N'); /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */
|
||||
*count = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
switch (lang_str[0])
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue