From 88e7f37488e4e8590619d815b975232a0c9d2ea0 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 21 Dec 2010 14:18:24 -0500 Subject: [PATCH] Annotate the Arabic joining table with block information --- src/gen-arabic-joining-table.py | 93 ++++++++++++++++---------- src/hb-ot-shape-complex-arabic-table.h | 24 ++++++- 2 files changed, 80 insertions(+), 37 deletions(-) diff --git a/src/gen-arabic-joining-table.py b/src/gen-arabic-joining-table.py index 75ea7335a..08e54db75 100755 --- a/src/gen-arabic-joining-table.py +++ b/src/gen-arabic-joining-table.py @@ -2,29 +2,10 @@ import sys -header = sys.stdin.readline(), sys.stdin.readline() -dic = dict() -for line in sys.stdin: - if line[:1] != '0': - continue +header = sys.stdin.readline (), sys.stdin.readline () +while sys.stdin.readline ().find ('##################') < 0: + pass - fields = [x.strip() for x in line.split(';')] - u = int(fields[0], 16) - - if u == 0x200C or u == 0x200D: - continue - if u < 0x0600: - raise Exception ("Ooops, unexpected unicode character: ", fields) - dic[u] = fields - -v = dic.keys() -v.sort() -min_u, max_u = v[0], v[-1] -occupancy = len(v) * 100 / (max_u - min_u + 1) - -# Maintain at least 40% occupancy in the table */ -if occupancy < 40: - raise Exception ("Table too sparse, please investigate: ", occupancy) print "/* == Start of generated table == */" print "/*" @@ -38,21 +19,65 @@ for line in header: print " * %s" % (line.strip()) print " */" -print "#define JOINING_TABLE_FIRST 0x%04x" % min_u -print "#define JOINING_TABLE_LAST 0x%04x" % max_u -print "static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =" +print "static const uint8_t joining_table[] =" print "{" -for i in range(min_u, max_u + 1): - if i not in dic: - print " JOINING_TYPE_X, /* %04X */" % i + +min_u = 0x110000 +max_u = 0 +num = 0 +last = -1 +block = '' +for line in sys.stdin: + + if line[0] == '#': + if line.find (" characters"): + block = line[2:].strip () + continue + + fields = [x.strip () for x in line.split (';')] + if len (fields) == 1: + continue + + u = int (fields[0], 16) + if u == 0x200C or u == 0x200D: + continue + if u < last: + raise Exception ("Input data character not sorted", u) + min_u = min (min_u, u) + max_u = max (max_u, u) + num += 1 + + if block: + print "\n /* %s */\n" % block + block = '' + + if last != -1: + last += 1 + while last < u: + print " JOINING_TYPE_X, /* %04X */" % last + last += 1 else: - entry = dic[i] - if entry[3] in ["ALAPH", "DALATH RISH"]: - value = "JOINING_GROUP_" + entry[3].replace(' ', '_') - else: - value = "JOINING_TYPE_" + entry[2] - print " %s, /* %s */" % (value, '; '.join(entry)) + last = u + + if fields[3] in ["ALAPH", "DALATH RISH"]: + value = "JOINING_GROUP_" + fields[3].replace(' ', '_') + else: + value = "JOINING_TYPE_" + fields[2] + print " %s, /* %s */" % (value, '; '.join(fields)) + +print print " JOINING_TYPE_X /* dummy */" print "};" +print + +print "#define JOINING_TABLE_FIRST 0x%04x" % min_u +print "#define JOINING_TABLE_LAST 0x%04x" % max_u +print + print "/* == End of generated table == */" + +occupancy = num * 100 / (max_u - min_u + 1) +# Maintain at least 40% occupancy in the table */ +if occupancy < 40: + raise Exception ("Table too sparse, please investigate: ", occupancy) diff --git a/src/hb-ot-shape-complex-arabic-table.h b/src/hb-ot-shape-complex-arabic-table.h index 861c6d002..523fc84a3 100644 --- a/src/hb-ot-shape-complex-arabic-table.h +++ b/src/hb-ot-shape-complex-arabic-table.h @@ -42,10 +42,11 @@ HB_BEGIN_DECLS * # ArabicShaping-6.1.0.txt * # Date: 2010-11-09, 12:10:00 PST [KW] */ -#define JOINING_TABLE_FIRST 0x0600 -#define JOINING_TABLE_LAST 0x0858 -static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] = +static const uint8_t joining_table[] = { + + /* Arabic characters */ + JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */ JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */ JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */ @@ -302,6 +303,9 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] = JOINING_TYPE_X, /* 06FD */ JOINING_TYPE_X, /* 06FE */ JOINING_TYPE_D, /* 06FF; HEH WITH INVERTED V; D; KNOTTED HEH */ + + /* Syriac characters */ + JOINING_TYPE_X, /* 0700 */ JOINING_TYPE_X, /* 0701 */ JOINING_TYPE_X, /* 0702 */ @@ -382,6 +386,9 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] = JOINING_TYPE_R, /* 074D; SOGDIAN ZHAIN; R; ZHAIN */ JOINING_TYPE_D, /* 074E; SOGDIAN KHAPH; D; KHAPH */ JOINING_TYPE_D, /* 074F; SOGDIAN FE; D; FE */ + + /* Arabic supplement characters */ + JOINING_TYPE_D, /* 0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH */ JOINING_TYPE_D, /* 0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH */ JOINING_TYPE_D, /* 0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH */ @@ -430,6 +437,9 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] = JOINING_TYPE_D, /* 077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN */ JOINING_TYPE_D, /* 077E; SEEN WITH INVERTED V; D; SEEN */ JOINING_TYPE_D, /* 077F; KAF WITH 2 DOTS ABOVE; D; KAF */ + + /* N'Ko Characters */ + JOINING_TYPE_X, /* 0780 */ JOINING_TYPE_X, /* 0781 */ JOINING_TYPE_X, /* 0782 */ @@ -553,6 +563,9 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] = JOINING_TYPE_X, /* 07F8 */ JOINING_TYPE_X, /* 07F9 */ JOINING_TYPE_C, /* 07FA; NKO LAJANYALAN; C; No_Joining_Group */ + + /* Mandaic Characters */ + JOINING_TYPE_X, /* 07FB */ JOINING_TYPE_X, /* 07FC */ JOINING_TYPE_X, /* 07FD */ @@ -647,8 +660,13 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] = JOINING_TYPE_U, /* 0856; MANDAIC DUSHENNA; U; No_Joining_Group */ JOINING_TYPE_U, /* 0857; MANDAIC KAD; U; No_Joining_Group */ JOINING_TYPE_U, /* 0858; MANDAIC AIN; U; No_Joining_Group */ + JOINING_TYPE_X /* dummy */ }; + +#define JOINING_TABLE_FIRST 0x0600 +#define JOINING_TABLE_LAST 0x0858 + /* == End of generated table == */ HB_END_DECLS