From 3df8017e9b7ea2b72477294133563b4ff304a007 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 17 May 2022 17:29:39 -0600 Subject: [PATCH] [ot-tag] Optimize subtag_matches() more --- src/gen-tag-table.py | 2 +- src/hb-ot-tag-table.hh | 112 ++++++++++++++++++++--------------------- src/hb-ot-tag.cc | 5 +- 3 files changed, 59 insertions(+), 60 deletions(-) diff --git a/src/gen-tag-table.py b/src/gen-tag-table.py index 9fc3c59fd..d8c5a2fcf 100755 --- a/src/gen-tag-table.py +++ b/src/gen-tag-table.py @@ -999,7 +999,7 @@ def print_subtag_matches (subtag, new_line): if new_line: print () print ('\t&& ', end='') - print ('subtag_matches (lang_str, limit, "-%s")' % subtag, end='') + print ('subtag_matches (lang_str, limit, "-%s", %i)' % (subtag, 1 + len (subtag)), end='') complex_tags = collections.defaultdict (list) for initial, group in itertools.groupby ((lt_tags for lt_tags in [ diff --git a/src/hb-ot-tag-table.hh b/src/hb-ot-tag-table.hh index 341eb20ce..bcf40123a 100644 --- a/src/hb-ot-tag-table.hh +++ b/src/hb-ot-tag-table.hh @@ -1645,63 +1645,63 @@ hb_ot_tags_from_complex_language (const char *lang_str, if (limit - lang_str > 5 || (limit - lang_str == 5 && *lang_str == '-')) { - if (subtag_matches (lang_str, limit, "-fonnapa")) + if (subtag_matches (lang_str, limit, "-fonnapa", 8)) { /* Undetermined; North American Phonetic Alphabet */ tags[0] = HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */ *count = 1; return true; } - if (subtag_matches (lang_str, limit, "-polyton")) + if (subtag_matches (lang_str, limit, "-polyton", 8)) { /* Modern Greek (1453-); Polytonic Greek */ tags[0] = HB_TAG('P','G','R',' '); /* Polytonic Greek */ *count = 1; return true; } - if (subtag_matches (lang_str, limit, "-arevmda")) + if (subtag_matches (lang_str, limit, "-arevmda", 8)) { /* Armenian; Western Armenian (retired code) */ tags[0] = HB_TAG('H','Y','E',' '); /* Armenian */ *count = 1; return true; } - if (subtag_matches (lang_str, limit, "-provenc")) + if (subtag_matches (lang_str, limit, "-provenc", 8)) { /* Occitan (post 1500); Provençal */ tags[0] = HB_TAG('P','R','O',' '); /* Provençal / Old Provençal */ *count = 1; return true; } - if (subtag_matches (lang_str, limit, "-fonipa")) + if (subtag_matches (lang_str, limit, "-fonipa", 7)) { /* Undetermined; International Phonetic Alphabet */ tags[0] = HB_TAG('I','P','P','H'); /* Phonetic transcription—IPA conventions */ *count = 1; return true; } - if (subtag_matches (lang_str, limit, "-geok")) + if (subtag_matches (lang_str, limit, "-geok", 5)) { /* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */ tags[0] = HB_TAG('K','G','E',' '); /* Khutsuri Georgian */ *count = 1; return true; } - if (subtag_matches (lang_str, limit, "-syre")) + if (subtag_matches (lang_str, limit, "-syre", 5)) { /* Undetermined; Syriac (Estrangelo variant) */ tags[0] = HB_TAG('S','Y','R','E'); /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */ *count = 1; return true; } - if (subtag_matches (lang_str, limit, "-syrj")) + if (subtag_matches (lang_str, limit, "-syrj", 5)) { /* Undetermined; Syriac (Western variant) */ tags[0] = HB_TAG('S','Y','R','J'); /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */ *count = 1; return true; } - if (subtag_matches (lang_str, limit, "-syrn")) + if (subtag_matches (lang_str, limit, "-syrn", 5)) { /* Undetermined; Syriac (Eastern variant) */ tags[0] = HB_TAG('S','Y','R','N'); /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */ @@ -1994,7 +1994,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "do-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Min Dong Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2002,7 +2002,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "do-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Min Dong Chinese; Macao */ unsigned int i; @@ -2016,7 +2016,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "do-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Min Dong Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2024,7 +2024,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "jy-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Jinyu Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2032,7 +2032,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "jy-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Jinyu Chinese; Macao */ unsigned int i; @@ -2046,7 +2046,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "jy-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Jinyu Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2054,7 +2054,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "mn-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Mandarin Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2062,7 +2062,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "mn-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Mandarin Chinese; Macao */ unsigned int i; @@ -2076,7 +2076,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "mn-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Mandarin Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2084,7 +2084,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "np-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Northern Ping Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2092,7 +2092,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "np-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Northern Ping Chinese; Macao */ unsigned int i; @@ -2106,7 +2106,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "np-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Northern Ping Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2114,7 +2114,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "px-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Pu-Xian Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2122,7 +2122,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "px-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Pu-Xian Chinese; Macao */ unsigned int i; @@ -2136,7 +2136,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "px-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Pu-Xian Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2144,7 +2144,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "sp-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Southern Ping Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2152,7 +2152,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "sp-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Southern Ping Chinese; Macao */ unsigned int i; @@ -2166,7 +2166,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "sp-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Southern Ping Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2174,7 +2174,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "zh-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Huizhou Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2182,7 +2182,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "zh-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Huizhou Chinese; Macao */ unsigned int i; @@ -2196,7 +2196,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "zh-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Huizhou Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2204,7 +2204,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "zo-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Min Zhong Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2212,7 +2212,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "zo-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Min Zhong Chinese; Macao */ unsigned int i; @@ -2226,7 +2226,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "zo-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Min Zhong Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2277,7 +2277,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "an-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Gan Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2285,7 +2285,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "an-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Gan Chinese; Macao */ unsigned int i; @@ -2299,7 +2299,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "an-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Gan Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2377,7 +2377,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "ak-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Hakka Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2385,7 +2385,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "ak-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Hakka Chinese; Macao */ unsigned int i; @@ -2399,7 +2399,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "ak-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Hakka Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2407,7 +2407,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "sn-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Xiang Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2415,7 +2415,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "sn-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Xiang Chinese; Macao */ unsigned int i; @@ -2429,7 +2429,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "sn-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Xiang Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2511,7 +2511,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "np-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Min Bei Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2519,7 +2519,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "np-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Min Bei Chinese; Macao */ unsigned int i; @@ -2533,7 +2533,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "np-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Min Bei Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2541,7 +2541,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "nw-", 3) - && subtag_matches (lang_str, limit, "-th")) + && subtag_matches (lang_str, limit, "-th", 3)) { /* Mon; Thailand */ tags[0] = HB_TAG('M','O','N','T'); /* Thailand Mon */ @@ -2585,7 +2585,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "an-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Min Nan Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2593,7 +2593,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "an-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Min Nan Chinese; Macao */ unsigned int i; @@ -2607,7 +2607,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "an-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Min Nan Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2631,7 +2631,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, break; case 'r': if (0 == strncmp (&lang_str[1], "o-", 2) - && subtag_matches (lang_str, limit, "-md")) + && subtag_matches (lang_str, limit, "-md", 3)) { /* Romanian; Moldova */ unsigned int i; @@ -2681,7 +2681,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "uu-", 3) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Wu Chinese; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2689,7 +2689,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "uu-", 3) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Wu Chinese; Macao */ unsigned int i; @@ -2703,7 +2703,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "uu-", 3) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Wu Chinese; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ @@ -2770,7 +2770,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "h-", 2) - && subtag_matches (lang_str, limit, "-hk")) + && subtag_matches (lang_str, limit, "-hk", 3)) { /* Chinese [macrolanguage]; Hong Kong */ tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Traditional, Hong Kong SAR */ @@ -2778,7 +2778,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "h-", 2) - && subtag_matches (lang_str, limit, "-mo")) + && subtag_matches (lang_str, limit, "-mo", 3)) { /* Chinese [macrolanguage]; Macao */ unsigned int i; @@ -2792,7 +2792,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } if (0 == strncmp (&lang_str[1], "h-", 2) - && subtag_matches (lang_str, limit, "-tw")) + && subtag_matches (lang_str, limit, "-tw", 3)) { /* Chinese [macrolanguage]; Taiwan, Province of China */ tags[0] = HB_TAG('Z','H','T',' '); /* Chinese, Traditional */ diff --git a/src/hb-ot-tag.cc b/src/hb-ot-tag.cc index 8d63eb791..31f247cf4 100644 --- a/src/hb-ot-tag.cc +++ b/src/hb-ot-tag.cc @@ -192,10 +192,9 @@ hb_ot_tag_to_script (hb_tag_t tag) static inline bool subtag_matches (const char *lang_str, const char *limit, - const char *subtag) + const char *subtag, + unsigned subtag_len) { - unsigned subtag_len = strlen (subtag); - if (likely ((unsigned) (limit - lang_str) < subtag_len)) return false;