From f5d619be79e9f23f67f23513e60c546fc498f1b8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Wed, 18 May 2022 11:04:52 -0600 Subject: [PATCH] [ot-tags] Further gate the slow complex case, and add more tests Part of https://github.com/harfbuzz/harfbuzz/issues/3591 Still 'zh-trad' is the slowest case. -------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------------- BM_hb_ot_tags_from_script_and_language/COMMON zh_trad 136 ns 136 ns 5107838 BM_hb_ot_tags_from_script_and_language/COMMON ab_abcd 115 ns 115 ns 6103104 BM_hb_ot_tags_from_script_and_language/COMMON ab_abc 25.4 ns 25.3 ns 27674482 BM_hb_ot_tags_from_script_and_language/COMMON abcdef_XY 20.2 ns 20.1 ns 34795719 BM_hb_ot_tags_from_script_and_language/COMMON abcd_XY 19.4 ns 19.3 ns 36390401 BM_hb_ot_tags_from_script_and_language/COMMON cxy_CN 33.5 ns 33.4 ns 20998939 BM_hb_ot_tags_from_script_and_language/COMMON exy_CN 25.1 ns 25.0 ns 27705832 BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 34.2 ns 34.1 ns 20564356 BM_hb_ot_tags_from_script_and_language/COMMON en_US 15.5 ns 15.5 ns 45032204 BM_hb_ot_tags_from_script_and_language/LATIN en_US 15.9 ns 15.8 ns 44412379 BM_hb_ot_tags_from_script_and_language/COMMON none 4.72 ns 4.71 ns 149101665 BM_hb_ot_tags_from_script_and_language/LATIN none 4.72 ns 4.70 ns 149254498 --- perf/benchmark-ot.cc | 3 +++ src/gen-tag-table.py | 3 +++ src/hb-ot-tag-table.hh | 3 +++ 3 files changed, 9 insertions(+) diff --git a/perf/benchmark-ot.cc b/perf/benchmark-ot.cc index c9edef303..7b122d40f 100644 --- a/perf/benchmark-ot.cc +++ b/perf/benchmark-ot.cc @@ -27,6 +27,9 @@ static void BM_hb_ot_tags_from_script_and_language (benchmark::State& state, language_tags /* OUT */); } } +BENCHMARK_CAPTURE (BM_hb_ot_tags_from_script_and_language, COMMON zh_trad, HB_SCRIPT_COMMON, "zh_trad"); +BENCHMARK_CAPTURE (BM_hb_ot_tags_from_script_and_language, COMMON ab_abcd, HB_SCRIPT_COMMON, "ab_abcd"); +BENCHMARK_CAPTURE (BM_hb_ot_tags_from_script_and_language, COMMON ab_abc, HB_SCRIPT_COMMON, "ab_abc"); BENCHMARK_CAPTURE (BM_hb_ot_tags_from_script_and_language, COMMON abcdef_XY, HB_SCRIPT_COMMON, "abcdef_XY"); BENCHMARK_CAPTURE (BM_hb_ot_tags_from_script_and_language, COMMON abcd_XY, HB_SCRIPT_COMMON, "abcd_XY"); BENCHMARK_CAPTURE (BM_hb_ot_tags_from_script_and_language, COMMON cxy_CN, HB_SCRIPT_COMMON, "cxy_CN"); diff --git a/src/gen-tag-table.py b/src/gen-tag-table.py index d8c5a2fcf..16b6818fa 100755 --- a/src/gen-tag-table.py +++ b/src/gen-tag-table.py @@ -1028,6 +1028,8 @@ min_subtag_len += 1 # For initial '-' print (' if (limit - lang_str > %d ||' % min_subtag_len) print (" (limit - lang_str == %d && *lang_str == '-'))" % min_subtag_len) print (' {') +print (" const char *p = strchr (lang_str, '-');") +print (" if (!p || p >= limit || limit - p < %i) goto out;" % min_subtag_len) for initial, items in sorted (complex_tags.items ()): if initial != 'und': continue @@ -1061,6 +1063,7 @@ for initial, items in sorted (complex_tags.items ()): print (' return true;') print (' }') print (' }') +print ('out:') print (' switch (lang_str[0])') print (' {') diff --git a/src/hb-ot-tag-table.hh b/src/hb-ot-tag-table.hh index bcf40123a..c2a548704 100644 --- a/src/hb-ot-tag-table.hh +++ b/src/hb-ot-tag-table.hh @@ -1645,6 +1645,8 @@ hb_ot_tags_from_complex_language (const char *lang_str, if (limit - lang_str > 5 || (limit - lang_str == 5 && *lang_str == '-')) { + const char *p = strchr (lang_str, '-'); + if (!p || p >= limit || limit - p < 5) goto out; if (subtag_matches (lang_str, limit, "-fonnapa", 8)) { /* Undetermined; North American Phonetic Alphabet */ @@ -1709,6 +1711,7 @@ hb_ot_tags_from_complex_language (const char *lang_str, return true; } } +out: switch (lang_str[0]) { case 'a':