[ot-tag] Optimize subtag_matches() more

This commit is contained in:
Behdad Esfahbod 2022-05-17 17:29:39 -06:00
parent b231fc2dbc
commit 3df8017e9b
3 changed files with 59 additions and 60 deletions

View File

@ -999,7 +999,7 @@ def print_subtag_matches (subtag, new_line):
if new_line:
print ()
print ('\t&& ', end='')
print ('subtag_matches (lang_str, limit, "-%s")' % subtag, end='')
print ('subtag_matches (lang_str, limit, "-%s", %i)' % (subtag, 1 + len (subtag)), end='')
complex_tags = collections.defaultdict (list)
for initial, group in itertools.groupby ((lt_tags for lt_tags in [

View File

@ -1645,63 +1645,63 @@ hb_ot_tags_from_complex_language (const char *lang_str,
if (limit - lang_str > 5 ||
(limit - lang_str == 5 && *lang_str == '-'))
{
if (subtag_matches (lang_str, limit, "-fonnapa"))
if (subtag_matches (lang_str, limit, "-fonnapa", 8))
{
/* Undetermined; North American Phonetic Alphabet */
tags[0] = HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */
*count = 1;
return true;
}
if (subtag_matches (lang_str, limit, "-polyton"))
if (subtag_matches (lang_str, limit, "-polyton", 8))
{
/* Modern Greek (1453-); Polytonic Greek */
tags[0] = HB_TAG('P','G','R',' '); /* Polytonic Greek */
*count = 1;
return true;
}
if (subtag_matches (lang_str, limit, "-arevmda"))
if (subtag_matches (lang_str, limit, "-arevmda", 8))
{
/* Armenian; Western Armenian (retired code) */
tags[0] = HB_TAG('H','Y','E',' '); /* Armenian */
*count = 1;
return true;
}
if (subtag_matches (lang_str, limit, "-provenc"))
if (subtag_matches (lang_str, limit, "-provenc", 8))
{
/* Occitan (post 1500); Provençal */
tags[0] = HB_TAG('P','R','O',' '); /* Provençal / Old Provençal */
*count = 1;
return true;
}
if (subtag_matches (lang_str, limit, "-fonipa"))
if (subtag_matches (lang_str, limit, "-fonipa", 7))
{
/* Undetermined; International Phonetic Alphabet */
tags[0] = HB_TAG('I','P','P','H'); /* Phonetic transcription—IPA conventions */
*count = 1;
return true;
}
if (subtag_matches (lang_str, limit, "-geok"))
if (subtag_matches (lang_str, limit, "-geok", 5))
{
/* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */
tags[0] = HB_TAG('K','G','E',' '); /* Khutsuri Georgian */
*count = 1;
return true;
}
if (subtag_matches (lang_str, limit, "-syre"))
if (subtag_matches (lang_str, limit, "-syre", 5))
{
/* Undetermined; Syriac (Estrangelo variant) */
tags[0] = HB_TAG('S','Y','R','E'); /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */
*count = 1;
return true;
}
if (subtag_matches (lang_str, limit, "-syrj"))
if (subtag_matches (lang_str, limit, "-syrj", 5))
{
/* Undetermined; Syriac (Western variant) */
tags[0] = HB_TAG('S','Y','R','J'); /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */
*count = 1;
return true;
}
if (subtag_matches (lang_str, limit, "-syrn"))
if (subtag_matches (lang_str, limit, "-syrn", 5))
{
/* Undetermined; Syriac (Eastern variant) */
tags[0] = HB_TAG('S','Y','R','N'); /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */
@ -1994,7 +1994,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "do-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Min Dong Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2002,7 +2002,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "do-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Min Dong Chinese; Macao */
unsigned int i;
@ -2016,7 +2016,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "do-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Min Dong Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2024,7 +2024,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "jy-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Jinyu Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2032,7 +2032,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "jy-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Jinyu Chinese; Macao */
unsigned int i;
@ -2046,7 +2046,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "jy-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Jinyu Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2054,7 +2054,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "mn-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Mandarin Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2062,7 +2062,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "mn-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Mandarin Chinese; Macao */
unsigned int i;
@ -2076,7 +2076,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "mn-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Mandarin Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2084,7 +2084,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "np-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Northern Ping Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2092,7 +2092,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "np-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Northern Ping Chinese; Macao */
unsigned int i;
@ -2106,7 +2106,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "np-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Northern Ping Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2114,7 +2114,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "px-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Pu-Xian Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2122,7 +2122,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "px-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Pu-Xian Chinese; Macao */
unsigned int i;
@ -2136,7 +2136,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "px-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Pu-Xian Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2144,7 +2144,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "sp-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Southern Ping Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2152,7 +2152,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "sp-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Southern Ping Chinese; Macao */
unsigned int i;
@ -2166,7 +2166,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "sp-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Southern Ping Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2174,7 +2174,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "zh-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Huizhou Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2182,7 +2182,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "zh-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Huizhou Chinese; Macao */
unsigned int i;
@ -2196,7 +2196,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "zh-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Huizhou Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2204,7 +2204,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "zo-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Min Zhong Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2212,7 +2212,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "zo-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Min Zhong Chinese; Macao */
unsigned int i;
@ -2226,7 +2226,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "zo-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Min Zhong Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2277,7 +2277,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "an-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Gan Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2285,7 +2285,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "an-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Gan Chinese; Macao */
unsigned int i;
@ -2299,7 +2299,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "an-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Gan Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2377,7 +2377,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "ak-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Hakka Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2385,7 +2385,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "ak-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Hakka Chinese; Macao */
unsigned int i;
@ -2399,7 +2399,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "ak-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Hakka Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2407,7 +2407,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "sn-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Xiang Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2415,7 +2415,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "sn-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Xiang Chinese; Macao */
unsigned int i;
@ -2429,7 +2429,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "sn-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Xiang Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2511,7 +2511,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "np-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Min Bei Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2519,7 +2519,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "np-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Min Bei Chinese; Macao */
unsigned int i;
@ -2533,7 +2533,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "np-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Min Bei Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2541,7 +2541,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "nw-", 3)
&& subtag_matches (lang_str, limit, "-th"))
&& subtag_matches (lang_str, limit, "-th", 3))
{
/* Mon; Thailand */
tags[0] = HB_TAG('M','O','N','T'); /* Thailand Mon */
@ -2585,7 +2585,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "an-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Min Nan Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2593,7 +2593,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "an-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Min Nan Chinese; Macao */
unsigned int i;
@ -2607,7 +2607,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "an-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Min Nan Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2631,7 +2631,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
break;
case 'r':
if (0 == strncmp (&lang_str[1], "o-", 2)
&& subtag_matches (lang_str, limit, "-md"))
&& subtag_matches (lang_str, limit, "-md", 3))
{
/* Romanian; Moldova */
unsigned int i;
@ -2681,7 +2681,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "uu-", 3)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Wu Chinese; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2689,7 +2689,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "uu-", 3)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Wu Chinese; Macao */
unsigned int i;
@ -2703,7 +2703,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "uu-", 3)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Wu Chinese; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */
@ -2770,7 +2770,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "h-", 2)
&& subtag_matches (lang_str, limit, "-hk"))
&& subtag_matches (lang_str, limit, "-hk", 3))
{
/* Chinese [macrolanguage]; Hong Kong */
tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */
@ -2778,7 +2778,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "h-", 2)
&& subtag_matches (lang_str, limit, "-mo"))
&& subtag_matches (lang_str, limit, "-mo", 3))
{
/* Chinese [macrolanguage]; Macao */
unsigned int i;
@ -2792,7 +2792,7 @@ hb_ot_tags_from_complex_language (const char *lang_str,
return true;
}
if (0 == strncmp (&lang_str[1], "h-", 2)
&& subtag_matches (lang_str, limit, "-tw"))
&& subtag_matches (lang_str, limit, "-tw", 3))
{
/* Chinese [macrolanguage]; Taiwan, Province of China */
tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */

View File

@ -192,10 +192,9 @@ hb_ot_tag_to_script (hb_tag_t tag)
static inline bool
subtag_matches (const char *lang_str,
const char *limit,
const char *subtag)
const char *subtag,
unsigned subtag_len)
{
unsigned subtag_len = strlen (subtag);
if (likely ((unsigned) (limit - lang_str) < subtag_len))
return false;