[arabic-table] Use segmented table

No functional change.
This commit is contained in:
Behdad Esfahbod 2014-06-20 17:59:43 -04:00
parent c2e1134046
commit b900fa2c8c
3 changed files with 97 additions and 46 deletions

View File

@ -71,42 +71,76 @@ def print_joining_table(f):
for value,short in short_value.items(): for value,short in short_value.items():
print "#define %s %s" % (short, value) print "#define %s %s" % (short, value)
keys = values.keys() uu = sorted(values.keys())
min_u = min(keys)
max_u = max(keys)
num = len(values) num = len(values)
last = -1
ranges = []
for u in uu:
if u - last <= 1+16*3:
ranges[-1][-1] = u
else:
ranges.append([u,u])
last = u
print print
print "static const uint8_t joining_table[] =" print "static const uint8_t joining_table[] ="
print "{" print "{"
last_block = None last_block = None
for u in range(min_u, max_u+1): offset = 0
for start,end in ranges:
value = values.get(u, "JOINING_TYPE_X") print
print "#define joining_offset_0x%04x %d" % (start, offset)
for u in range(start, end+1):
block = blocks.get(u, last_block) block = blocks.get(u, last_block)
if block != last_block: value = values.get(u, "JOINING_TYPE_X")
print "\n\n /* %s */" % block
if block != last_block or u == start:
if u != start:
print
print "\n /* %s */" % block
last_block = block last_block = block
if u % 32 != 0: if u % 32 != 0:
print print
print " /* %04X */" % u, " " * (u % 32), print " /* %04X */" % (u//32*32), " " * (u % 32),
if u % 32 == 0: if u % 32 == 0:
print print
print " /* %04X */ " % u, print " /* %04X */ " % u,
sys.stdout.write("%s," % short_value[value]) sys.stdout.write("%s," % short_value[value])
print print
print "};"
offset += end - start + 1
print print
print "#define JOINING_TABLE_FIRST 0x%04X" % min_u occupancy = num * 100. / offset
print "#define JOINING_TABLE_LAST 0x%04X" % max_u print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)
print print
occupancy = num * 100 / (max_u - min_u + 1) page_bits = 8
# Maintain at least 40% occupancy in the table */ print
if occupancy < 40: print "static unsigned int"
raise Exception ("Table too sparse, please investigate: ", occupancy) print "joining_type (hb_codepoint_t u)"
print "{"
print " switch (u >> %d)" % page_bits
print " {"
pages = set([u>>page_bits for u in [s for s,e in ranges]+[e for s,e in ranges]])
for p in sorted(pages):
print " case 0x%0X:" % p
for (start,end) in ranges:
if p not in [start>>page_bits, end>>page_bits]: continue
offset = "joining_offset_0x%04x" % start
print " if (0x%04X <= u && u <= 0x%04X) return joining_table[u - 0x%04X + %s];" % (start, end, start, offset)
print " break;"
print ""
print " default:"
print " break;"
print " }"
print " return X;"
print "}"
print
for value,short in short_value.items(): for value,short in short_value.items():
print "#undef %s" % (short) print "#undef %s" % (short)
print print

View File

@ -28,6 +28,7 @@
static const uint8_t joining_table[] = static const uint8_t joining_table[] =
{ {
#define joining_offset_0x0600 0
/* Arabic */ /* Arabic */
@ -48,37 +49,55 @@ static const uint8_t joining_table[] =
/* Arabic Supplement */ /* Arabic Supplement */
/* 0750 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D, /* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D,
/* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D, /* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D,
/* Thaana */ #define joining_offset_0x07ca 384
/* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* NKo */ /* NKo */
/* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, /* 07C0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,X,X,X,X,X, /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,
/* Samaritan */ #define joining_offset_0x0840 433
/* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* Mandaic */ /* Mandaic */
/* 0840 */ R,D,D,D,D,D,R,D,D,R,D,D,D,D,D,R,D,D,D,D,R,D,U,U,U,X,X,X,X,X,X,X, /* 0840 */ R,D,D,D,D,D,R,D,D,R,D,D,D,D,D,R,D,D,D,D,R,D,U,U,U,
/* 0860 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, #define joining_offset_0x08a0 458
/* Arabic Extended-A */ /* Arabic Extended-A */
/* 08A0 */ D,X,D,D,D,D,D,D,D,D,R,R,R, /* 08A0 */ D,X,D,D,D,D,D,D,D,D,R,R,R,
};
#define JOINING_TABLE_FIRST 0x0600 }; /* Table items: 471; occupancy: 66% */
#define JOINING_TABLE_LAST 0x08AC
static unsigned int
joining_type (hb_codepoint_t u)
{
switch (u >> 8)
{
case 0x6:
if (0x0600 <= u && u <= 0x077F) return joining_table[u - 0x0600 + joining_offset_0x0600];
break;
case 0x7:
if (0x0600 <= u && u <= 0x077F) return joining_table[u - 0x0600 + joining_offset_0x0600];
if (0x07CA <= u && u <= 0x07FA) return joining_table[u - 0x07CA + joining_offset_0x07ca];
break;
case 0x8:
if (0x0840 <= u && u <= 0x0858) return joining_table[u - 0x0840 + joining_offset_0x0840];
if (0x08A0 <= u && u <= 0x08AC) return joining_table[u - 0x08A0 + joining_offset_0x08a0];
break;
default:
break;
}
return X;
}
#undef X #undef X
#undef R #undef R

View File

@ -57,11 +57,9 @@ enum {
static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat) static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat)
{ {
if (likely (hb_in_range<hb_codepoint_t> (u, JOINING_TABLE_FIRST, JOINING_TABLE_LAST))) { unsigned int j_type = joining_type(u);
unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST];
if (likely (j_type != JOINING_TYPE_X)) if (likely (j_type != JOINING_TYPE_X))
return j_type; return j_type;
}
/* Mongolian joining data is not in ArabicJoining.txt yet. */ /* Mongolian joining data is not in ArabicJoining.txt yet. */
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1800, 0x18AF))) if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1800, 0x18AF)))