From b900fa2c8cc088dbcbdbf90bfdf8764f9ee1c96a Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 20 Jun 2014 17:59:43 -0400 Subject: [PATCH] [arabic-table] Use segmented table No functional change. --- src/gen-arabic-table.py | 82 +++++++++++++++++-------- src/hb-ot-shape-complex-arabic-table.hh | 53 +++++++++++----- src/hb-ot-shape-complex-arabic.cc | 8 +-- 3 files changed, 97 insertions(+), 46 deletions(-) diff --git a/src/gen-arabic-table.py b/src/gen-arabic-table.py index 6f2c9d507..1596126c0 100755 --- a/src/gen-arabic-table.py +++ b/src/gen-arabic-table.py @@ -71,42 +71,76 @@ def print_joining_table(f): for value,short in short_value.items(): print "#define %s %s" % (short, value) - keys = values.keys() - min_u = min(keys) - max_u = max(keys) + uu = sorted(values.keys()) num = len(values) + + last = -1 + ranges = [] + for u in uu: + if u - last <= 1+16*3: + ranges[-1][-1] = u + else: + ranges.append([u,u]) + last = u + print print "static const uint8_t joining_table[] =" print "{" last_block = None - for u in range(min_u, max_u+1): + offset = 0 + for start,end in ranges: - value = values.get(u, "JOINING_TYPE_X") + print + print "#define joining_offset_0x%04x %d" % (start, offset) - block = blocks.get(u, last_block) - if block != last_block: - print "\n\n /* %s */" % block - last_block = block - if u % 32 != 0: + for u in range(start, end+1): + + block = blocks.get(u, last_block) + value = values.get(u, "JOINING_TYPE_X") + + if block != last_block or u == start: + if u != start: + print + print "\n /* %s */" % block + last_block = block + if u % 32 != 0: + print + print " /* %04X */" % (u//32*32), " " * (u % 32), + + if u % 32 == 0: print - print " /* %04X */" % u, " " * (u % 32), + print " /* %04X */ " % u, + sys.stdout.write("%s," % short_value[value]) + print - if u % 32 == 0: - print - print " /* %04X */ " % u, - sys.stdout.write("%s," % short_value[value]) + offset += end - start + 1 print - print "};" - print - print "#define JOINING_TABLE_FIRST 0x%04X" % min_u - print "#define JOINING_TABLE_LAST 0x%04X" % max_u + occupancy = num * 100. / offset + print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy) print - occupancy = num * 100 / (max_u - min_u + 1) - # Maintain at least 40% occupancy in the table */ - if occupancy < 40: - raise Exception ("Table too sparse, please investigate: ", occupancy) - + page_bits = 8 + print + print "static unsigned int" + print "joining_type (hb_codepoint_t u)" + print "{" + print " switch (u >> %d)" % page_bits + print " {" + pages = set([u>>page_bits for u in [s for s,e in ranges]+[e for s,e in ranges]]) + for p in sorted(pages): + print " case 0x%0X:" % p + for (start,end) in ranges: + if p not in [start>>page_bits, end>>page_bits]: continue + offset = "joining_offset_0x%04x" % start + print " if (0x%04X <= u && u <= 0x%04X) return joining_table[u - 0x%04X + %s];" % (start, end, start, offset) + print " break;" + print "" + print " default:" + print " break;" + print " }" + print " return X;" + print "}" + print for value,short in short_value.items(): print "#undef %s" % (short) print diff --git a/src/hb-ot-shape-complex-arabic-table.hh b/src/hb-ot-shape-complex-arabic-table.hh index ad119dafc..877625a67 100644 --- a/src/hb-ot-shape-complex-arabic-table.hh +++ b/src/hb-ot-shape-complex-arabic-table.hh @@ -28,6 +28,7 @@ static const uint8_t joining_table[] = { +#define joining_offset_0x0600 0 /* Arabic */ @@ -48,37 +49,55 @@ static const uint8_t joining_table[] = /* Arabic Supplement */ - /* 0750 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D, + /* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D, /* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D, - /* Thaana */ - - /* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, +#define joining_offset_0x07ca 384 /* NKo */ - /* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,X,X,X,X,X, + /* 07C0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C, - /* Samaritan */ - - /* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, +#define joining_offset_0x0840 433 /* Mandaic */ - /* 0840 */ R,D,D,D,D,D,R,D,D,R,D,D,D,D,D,R,D,D,D,D,R,D,U,U,U,X,X,X,X,X,X,X, - /* 0860 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0840 */ R,D,D,D,D,D,R,D,D,R,D,D,D,D,D,R,D,D,D,D,R,D,U,U,U, + +#define joining_offset_0x08a0 458 /* Arabic Extended-A */ /* 08A0 */ D,X,D,D,D,D,D,D,D,D,R,R,R, -}; -#define JOINING_TABLE_FIRST 0x0600 -#define JOINING_TABLE_LAST 0x08AC +}; /* Table items: 471; occupancy: 66% */ + + +static unsigned int +joining_type (hb_codepoint_t u) +{ + switch (u >> 8) + { + case 0x6: + if (0x0600 <= u && u <= 0x077F) return joining_table[u - 0x0600 + joining_offset_0x0600]; + break; + + case 0x7: + if (0x0600 <= u && u <= 0x077F) return joining_table[u - 0x0600 + joining_offset_0x0600]; + if (0x07CA <= u && u <= 0x07FA) return joining_table[u - 0x07CA + joining_offset_0x07ca]; + break; + + case 0x8: + if (0x0840 <= u && u <= 0x0858) return joining_table[u - 0x0840 + joining_offset_0x0840]; + if (0x08A0 <= u && u <= 0x08AC) return joining_table[u - 0x08A0 + joining_offset_0x08a0]; + break; + + default: + break; + } + return X; +} #undef X #undef R diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index ea6d85c1e..61a55efe9 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -57,11 +57,9 @@ enum { static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat) { - if (likely (hb_in_range (u, JOINING_TABLE_FIRST, JOINING_TABLE_LAST))) { - unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST]; - if (likely (j_type != JOINING_TYPE_X)) - return j_type; - } + unsigned int j_type = joining_type(u); + if (likely (j_type != JOINING_TYPE_X)) + return j_type; /* Mongolian joining data is not in ArabicJoining.txt yet. */ if (unlikely (hb_in_range (u, 0x1800, 0x18AF)))