[arabic-table] Use segmented table

No functional change.
This commit is contained in:
Behdad Esfahbod 2014-06-20 17:59:43 -04:00
parent c2e1134046
commit b900fa2c8c
3 changed files with 97 additions and 46 deletions

View File

@ -71,42 +71,76 @@ def print_joining_table(f):
for value,short in short_value.items():
print "#define %s %s" % (short, value)
keys = values.keys()
min_u = min(keys)
max_u = max(keys)
uu = sorted(values.keys())
num = len(values)
last = -1
ranges = []
for u in uu:
if u - last <= 1+16*3:
ranges[-1][-1] = u
else:
ranges.append([u,u])
last = u
print
print "static const uint8_t joining_table[] ="
print "{"
last_block = None
for u in range(min_u, max_u+1):
offset = 0
for start,end in ranges:
value = values.get(u, "JOINING_TYPE_X")
print
print "#define joining_offset_0x%04x %d" % (start, offset)
for u in range(start, end+1):
block = blocks.get(u, last_block)
if block != last_block:
print "\n\n /* %s */" % block
value = values.get(u, "JOINING_TYPE_X")
if block != last_block or u == start:
if u != start:
print
print "\n /* %s */" % block
last_block = block
if u % 32 != 0:
print
print " /* %04X */" % u, " " * (u % 32),
print " /* %04X */" % (u//32*32), " " * (u % 32),
if u % 32 == 0:
print
print " /* %04X */ " % u,
sys.stdout.write("%s," % short_value[value])
print
print "};"
offset += end - start + 1
print
print "#define JOINING_TABLE_FIRST 0x%04X" % min_u
print "#define JOINING_TABLE_LAST 0x%04X" % max_u
occupancy = num * 100. / offset
print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)
print
occupancy = num * 100 / (max_u - min_u + 1)
# Maintain at least 40% occupancy in the table */
if occupancy < 40:
raise Exception ("Table too sparse, please investigate: ", occupancy)
page_bits = 8
print
print "static unsigned int"
print "joining_type (hb_codepoint_t u)"
print "{"
print " switch (u >> %d)" % page_bits
print " {"
pages = set([u>>page_bits for u in [s for s,e in ranges]+[e for s,e in ranges]])
for p in sorted(pages):
print " case 0x%0X:" % p
for (start,end) in ranges:
if p not in [start>>page_bits, end>>page_bits]: continue
offset = "joining_offset_0x%04x" % start
print " if (0x%04X <= u && u <= 0x%04X) return joining_table[u - 0x%04X + %s];" % (start, end, start, offset)
print " break;"
print ""
print " default:"
print " break;"
print " }"
print " return X;"
print "}"
print
for value,short in short_value.items():
print "#undef %s" % (short)
print

View File

@ -28,6 +28,7 @@
static const uint8_t joining_table[] =
{
#define joining_offset_0x0600 0
/* Arabic */
@ -48,37 +49,55 @@ static const uint8_t joining_table[] =
/* Arabic Supplement */
/* 0750 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D,
/* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D,
/* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D,
/* Thaana */
/* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
#define joining_offset_0x07ca 384
/* NKo */
/* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,X,X,X,X,X,
/* 07C0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,
/* Samaritan */
/* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
#define joining_offset_0x0840 433
/* Mandaic */
/* 0840 */ R,D,D,D,D,D,R,D,D,R,D,D,D,D,D,R,D,D,D,D,R,D,U,U,U,X,X,X,X,X,X,X,
/* 0860 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0840 */ R,D,D,D,D,D,R,D,D,R,D,D,D,D,D,R,D,D,D,D,R,D,U,U,U,
#define joining_offset_0x08a0 458
/* Arabic Extended-A */
/* 08A0 */ D,X,D,D,D,D,D,D,D,D,R,R,R,
};
#define JOINING_TABLE_FIRST 0x0600
#define JOINING_TABLE_LAST 0x08AC
}; /* Table items: 471; occupancy: 66% */
static unsigned int
joining_type (hb_codepoint_t u)
{
switch (u >> 8)
{
case 0x6:
if (0x0600 <= u && u <= 0x077F) return joining_table[u - 0x0600 + joining_offset_0x0600];
break;
case 0x7:
if (0x0600 <= u && u <= 0x077F) return joining_table[u - 0x0600 + joining_offset_0x0600];
if (0x07CA <= u && u <= 0x07FA) return joining_table[u - 0x07CA + joining_offset_0x07ca];
break;
case 0x8:
if (0x0840 <= u && u <= 0x0858) return joining_table[u - 0x0840 + joining_offset_0x0840];
if (0x08A0 <= u && u <= 0x08AC) return joining_table[u - 0x08A0 + joining_offset_0x08a0];
break;
default:
break;
}
return X;
}
#undef X
#undef R

View File

@ -57,11 +57,9 @@ enum {
static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat)
{
if (likely (hb_in_range<hb_codepoint_t> (u, JOINING_TABLE_FIRST, JOINING_TABLE_LAST))) {
unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST];
unsigned int j_type = joining_type(u);
if (likely (j_type != JOINING_TYPE_X))
return j_type;
}
/* Mongolian joining data is not in ArabicJoining.txt yet. */
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1800, 0x18AF)))