Annotate the Arabic joining table with block information
This commit is contained in:
parent
1482a39e56
commit
88e7f37488
|
@ -2,29 +2,10 @@
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
header = sys.stdin.readline(), sys.stdin.readline()
|
header = sys.stdin.readline (), sys.stdin.readline ()
|
||||||
dic = dict()
|
while sys.stdin.readline ().find ('##################') < 0:
|
||||||
for line in sys.stdin:
|
pass
|
||||||
if line[:1] != '0':
|
|
||||||
continue
|
|
||||||
|
|
||||||
fields = [x.strip() for x in line.split(';')]
|
|
||||||
u = int(fields[0], 16)
|
|
||||||
|
|
||||||
if u == 0x200C or u == 0x200D:
|
|
||||||
continue
|
|
||||||
if u < 0x0600:
|
|
||||||
raise Exception ("Ooops, unexpected unicode character: ", fields)
|
|
||||||
dic[u] = fields
|
|
||||||
|
|
||||||
v = dic.keys()
|
|
||||||
v.sort()
|
|
||||||
min_u, max_u = v[0], v[-1]
|
|
||||||
occupancy = len(v) * 100 / (max_u - min_u + 1)
|
|
||||||
|
|
||||||
# Maintain at least 40% occupancy in the table */
|
|
||||||
if occupancy < 40:
|
|
||||||
raise Exception ("Table too sparse, please investigate: ", occupancy)
|
|
||||||
|
|
||||||
print "/* == Start of generated table == */"
|
print "/* == Start of generated table == */"
|
||||||
print "/*"
|
print "/*"
|
||||||
|
@ -38,21 +19,65 @@ for line in header:
|
||||||
print " * %s" % (line.strip())
|
print " * %s" % (line.strip())
|
||||||
print " */"
|
print " */"
|
||||||
|
|
||||||
print "#define JOINING_TABLE_FIRST 0x%04x" % min_u
|
print "static const uint8_t joining_table[] ="
|
||||||
print "#define JOINING_TABLE_LAST 0x%04x" % max_u
|
|
||||||
print "static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] ="
|
|
||||||
print "{"
|
print "{"
|
||||||
|
|
||||||
for i in range(min_u, max_u + 1):
|
|
||||||
if i not in dic:
|
min_u = 0x110000
|
||||||
print " JOINING_TYPE_X, /* %04X */" % i
|
max_u = 0
|
||||||
|
num = 0
|
||||||
|
last = -1
|
||||||
|
block = ''
|
||||||
|
for line in sys.stdin:
|
||||||
|
|
||||||
|
if line[0] == '#':
|
||||||
|
if line.find (" characters"):
|
||||||
|
block = line[2:].strip ()
|
||||||
|
continue
|
||||||
|
|
||||||
|
fields = [x.strip () for x in line.split (';')]
|
||||||
|
if len (fields) == 1:
|
||||||
|
continue
|
||||||
|
|
||||||
|
u = int (fields[0], 16)
|
||||||
|
if u == 0x200C or u == 0x200D:
|
||||||
|
continue
|
||||||
|
if u < last:
|
||||||
|
raise Exception ("Input data character not sorted", u)
|
||||||
|
min_u = min (min_u, u)
|
||||||
|
max_u = max (max_u, u)
|
||||||
|
num += 1
|
||||||
|
|
||||||
|
if block:
|
||||||
|
print "\n /* %s */\n" % block
|
||||||
|
block = ''
|
||||||
|
|
||||||
|
if last != -1:
|
||||||
|
last += 1
|
||||||
|
while last < u:
|
||||||
|
print " JOINING_TYPE_X, /* %04X */" % last
|
||||||
|
last += 1
|
||||||
else:
|
else:
|
||||||
entry = dic[i]
|
last = u
|
||||||
if entry[3] in ["ALAPH", "DALATH RISH"]:
|
|
||||||
value = "JOINING_GROUP_" + entry[3].replace(' ', '_')
|
if fields[3] in ["ALAPH", "DALATH RISH"]:
|
||||||
|
value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
|
||||||
else:
|
else:
|
||||||
value = "JOINING_TYPE_" + entry[2]
|
value = "JOINING_TYPE_" + fields[2]
|
||||||
print " %s, /* %s */" % (value, '; '.join(entry))
|
print " %s, /* %s */" % (value, '; '.join(fields))
|
||||||
|
|
||||||
|
print
|
||||||
print " JOINING_TYPE_X /* dummy */"
|
print " JOINING_TYPE_X /* dummy */"
|
||||||
print "};"
|
print "};"
|
||||||
|
print
|
||||||
|
|
||||||
|
print "#define JOINING_TABLE_FIRST 0x%04x" % min_u
|
||||||
|
print "#define JOINING_TABLE_LAST 0x%04x" % max_u
|
||||||
|
print
|
||||||
|
|
||||||
print "/* == End of generated table == */"
|
print "/* == End of generated table == */"
|
||||||
|
|
||||||
|
occupancy = num * 100 / (max_u - min_u + 1)
|
||||||
|
# Maintain at least 40% occupancy in the table */
|
||||||
|
if occupancy < 40:
|
||||||
|
raise Exception ("Table too sparse, please investigate: ", occupancy)
|
||||||
|
|
|
@ -42,10 +42,11 @@ HB_BEGIN_DECLS
|
||||||
* # ArabicShaping-6.1.0.txt
|
* # ArabicShaping-6.1.0.txt
|
||||||
* # Date: 2010-11-09, 12:10:00 PST [KW]
|
* # Date: 2010-11-09, 12:10:00 PST [KW]
|
||||||
*/
|
*/
|
||||||
#define JOINING_TABLE_FIRST 0x0600
|
static const uint8_t joining_table[] =
|
||||||
#define JOINING_TABLE_LAST 0x0858
|
|
||||||
static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
|
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/* Arabic characters */
|
||||||
|
|
||||||
JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */
|
JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */
|
||||||
JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */
|
JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */
|
||||||
JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */
|
JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */
|
||||||
|
@ -302,6 +303,9 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
|
||||||
JOINING_TYPE_X, /* 06FD */
|
JOINING_TYPE_X, /* 06FD */
|
||||||
JOINING_TYPE_X, /* 06FE */
|
JOINING_TYPE_X, /* 06FE */
|
||||||
JOINING_TYPE_D, /* 06FF; HEH WITH INVERTED V; D; KNOTTED HEH */
|
JOINING_TYPE_D, /* 06FF; HEH WITH INVERTED V; D; KNOTTED HEH */
|
||||||
|
|
||||||
|
/* Syriac characters */
|
||||||
|
|
||||||
JOINING_TYPE_X, /* 0700 */
|
JOINING_TYPE_X, /* 0700 */
|
||||||
JOINING_TYPE_X, /* 0701 */
|
JOINING_TYPE_X, /* 0701 */
|
||||||
JOINING_TYPE_X, /* 0702 */
|
JOINING_TYPE_X, /* 0702 */
|
||||||
|
@ -382,6 +386,9 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
|
||||||
JOINING_TYPE_R, /* 074D; SOGDIAN ZHAIN; R; ZHAIN */
|
JOINING_TYPE_R, /* 074D; SOGDIAN ZHAIN; R; ZHAIN */
|
||||||
JOINING_TYPE_D, /* 074E; SOGDIAN KHAPH; D; KHAPH */
|
JOINING_TYPE_D, /* 074E; SOGDIAN KHAPH; D; KHAPH */
|
||||||
JOINING_TYPE_D, /* 074F; SOGDIAN FE; D; FE */
|
JOINING_TYPE_D, /* 074F; SOGDIAN FE; D; FE */
|
||||||
|
|
||||||
|
/* Arabic supplement characters */
|
||||||
|
|
||||||
JOINING_TYPE_D, /* 0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH */
|
JOINING_TYPE_D, /* 0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH */
|
||||||
JOINING_TYPE_D, /* 0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH */
|
JOINING_TYPE_D, /* 0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH */
|
||||||
JOINING_TYPE_D, /* 0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH */
|
JOINING_TYPE_D, /* 0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH */
|
||||||
|
@ -430,6 +437,9 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
|
||||||
JOINING_TYPE_D, /* 077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN */
|
JOINING_TYPE_D, /* 077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN */
|
||||||
JOINING_TYPE_D, /* 077E; SEEN WITH INVERTED V; D; SEEN */
|
JOINING_TYPE_D, /* 077E; SEEN WITH INVERTED V; D; SEEN */
|
||||||
JOINING_TYPE_D, /* 077F; KAF WITH 2 DOTS ABOVE; D; KAF */
|
JOINING_TYPE_D, /* 077F; KAF WITH 2 DOTS ABOVE; D; KAF */
|
||||||
|
|
||||||
|
/* N'Ko Characters */
|
||||||
|
|
||||||
JOINING_TYPE_X, /* 0780 */
|
JOINING_TYPE_X, /* 0780 */
|
||||||
JOINING_TYPE_X, /* 0781 */
|
JOINING_TYPE_X, /* 0781 */
|
||||||
JOINING_TYPE_X, /* 0782 */
|
JOINING_TYPE_X, /* 0782 */
|
||||||
|
@ -553,6 +563,9 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
|
||||||
JOINING_TYPE_X, /* 07F8 */
|
JOINING_TYPE_X, /* 07F8 */
|
||||||
JOINING_TYPE_X, /* 07F9 */
|
JOINING_TYPE_X, /* 07F9 */
|
||||||
JOINING_TYPE_C, /* 07FA; NKO LAJANYALAN; C; No_Joining_Group */
|
JOINING_TYPE_C, /* 07FA; NKO LAJANYALAN; C; No_Joining_Group */
|
||||||
|
|
||||||
|
/* Mandaic Characters */
|
||||||
|
|
||||||
JOINING_TYPE_X, /* 07FB */
|
JOINING_TYPE_X, /* 07FB */
|
||||||
JOINING_TYPE_X, /* 07FC */
|
JOINING_TYPE_X, /* 07FC */
|
||||||
JOINING_TYPE_X, /* 07FD */
|
JOINING_TYPE_X, /* 07FD */
|
||||||
|
@ -647,8 +660,13 @@ static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
|
||||||
JOINING_TYPE_U, /* 0856; MANDAIC DUSHENNA; U; No_Joining_Group */
|
JOINING_TYPE_U, /* 0856; MANDAIC DUSHENNA; U; No_Joining_Group */
|
||||||
JOINING_TYPE_U, /* 0857; MANDAIC KAD; U; No_Joining_Group */
|
JOINING_TYPE_U, /* 0857; MANDAIC KAD; U; No_Joining_Group */
|
||||||
JOINING_TYPE_U, /* 0858; MANDAIC AIN; U; No_Joining_Group */
|
JOINING_TYPE_U, /* 0858; MANDAIC AIN; U; No_Joining_Group */
|
||||||
|
|
||||||
JOINING_TYPE_X /* dummy */
|
JOINING_TYPE_X /* dummy */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define JOINING_TABLE_FIRST 0x0600
|
||||||
|
#define JOINING_TABLE_LAST 0x0858
|
||||||
|
|
||||||
/* == End of generated table == */
|
/* == End of generated table == */
|
||||||
|
|
||||||
HB_END_DECLS
|
HB_END_DECLS
|
||||||
|
|
Loading…
Reference in New Issue