diff --git a/src/gen-arabic-joining-table.py b/src/gen-arabic-joining-table.py index f12c2078f..75ea7335a 100755 --- a/src/gen-arabic-joining-table.py +++ b/src/gen-arabic-joining-table.py @@ -11,22 +11,39 @@ for line in sys.stdin: fields = [x.strip() for x in line.split(';')] u = int(fields[0], 16) - if u < 0x0600 or (u > 0x07FF and u != 0x200C and u != 0x200D): + if u == 0x200C or u == 0x200D: + continue + if u < 0x0600: raise Exception ("Ooops, unexpected unicode character: ", fields) dic[u] = fields -print " /*" -print " * The following table is generated by running:" -print " *" -print " * ./gen-arabic-joining-table.py < ArabicShaping.txt" -print " *" -print " * on the ArabicShaping.txt file with the header:" -print " *" +v = dic.keys() +v.sort() +min_u, max_u = v[0], v[-1] +occupancy = len(v) * 100 / (max_u - min_u + 1) + +# Maintain at least 40% occupancy in the table */ +if occupancy < 40: + raise Exception ("Table too sparse, please investigate: ", occupancy) + +print "/* == Start of generated table == */" +print "/*" +print " * The following table is generated by running:" +print " *" +print " * ./gen-arabic-joining-table.py < ArabicShaping.txt" +print " *" +print " * on the ArabicShaping.txt file with the header:" +print " *" for line in header: - print " * %s" % (line.strip()) -print " */" -print " /* == Start of generated table == */" -for i in range(0x0600, 0x0800): + print " * %s" % (line.strip()) +print " */" + +print "#define JOINING_TABLE_FIRST 0x%04x" % min_u +print "#define JOINING_TABLE_LAST 0x%04x" % max_u +print "static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =" +print "{" + +for i in range(min_u, max_u + 1): if i not in dic: print " JOINING_TYPE_X, /* %04X */" % i else: @@ -36,4 +53,6 @@ for i in range(0x0600, 0x0800): else: value = "JOINING_TYPE_" + entry[2] print " %s, /* %s */" % (value, '; '.join(entry)) -print " /* == End of generated table == */" +print " JOINING_TYPE_X /* dummy */" +print "};" +print "/* == End of generated table == */" diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index a63060c66..63f836b0e 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -56,23 +56,21 @@ enum { */ +/* == Start of generated table == */ /* - * Main joining-type table, covering U+0600..U+07FF. - * Includes Arabic, Syriac, and N'ko. + * The following table is generated by running: + * + * ./gen-arabic-joining-table.py < ArabicShaping.txt + * + * on the ArabicShaping.txt file with the header: + * + * # ArabicShaping-6.1.0.txt + * # Date: 2010-11-09, 12:10:00 PST [KW] */ -static const uint8_t arabic_syriac_nko_joining_types[0x0800 - 0x0600 + 1] = +#define JOINING_TABLE_FIRST 0x0600 +#define JOINING_TABLE_LAST 0x0858 +static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] = { - /* - * The following table is generated by running: - * - * ./gen-arabic-joining-table.py < ArabicShaping.txt - * - * on the ArabicShaping.txt file with the header: - * - * # ArabicShaping-6.0.0.txt - * # Date: 2010-04-30, 13:47:00 PDT [KW] - */ - /* == Start of generated table == */ JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */ JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */ JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */ @@ -585,16 +583,105 @@ static const uint8_t arabic_syriac_nko_joining_types[0x0800 - 0x0600 + 1] = JOINING_TYPE_X, /* 07FD */ JOINING_TYPE_X, /* 07FE */ JOINING_TYPE_X, /* 07FF */ - /* == End of generated table == */ - JOINING_TYPE_X + JOINING_TYPE_X, /* 0800 */ + JOINING_TYPE_X, /* 0801 */ + JOINING_TYPE_X, /* 0802 */ + JOINING_TYPE_X, /* 0803 */ + JOINING_TYPE_X, /* 0804 */ + JOINING_TYPE_X, /* 0805 */ + JOINING_TYPE_X, /* 0806 */ + JOINING_TYPE_X, /* 0807 */ + JOINING_TYPE_X, /* 0808 */ + JOINING_TYPE_X, /* 0809 */ + JOINING_TYPE_X, /* 080A */ + JOINING_TYPE_X, /* 080B */ + JOINING_TYPE_X, /* 080C */ + JOINING_TYPE_X, /* 080D */ + JOINING_TYPE_X, /* 080E */ + JOINING_TYPE_X, /* 080F */ + JOINING_TYPE_X, /* 0810 */ + JOINING_TYPE_X, /* 0811 */ + JOINING_TYPE_X, /* 0812 */ + JOINING_TYPE_X, /* 0813 */ + JOINING_TYPE_X, /* 0814 */ + JOINING_TYPE_X, /* 0815 */ + JOINING_TYPE_X, /* 0816 */ + JOINING_TYPE_X, /* 0817 */ + JOINING_TYPE_X, /* 0818 */ + JOINING_TYPE_X, /* 0819 */ + JOINING_TYPE_X, /* 081A */ + JOINING_TYPE_X, /* 081B */ + JOINING_TYPE_X, /* 081C */ + JOINING_TYPE_X, /* 081D */ + JOINING_TYPE_X, /* 081E */ + JOINING_TYPE_X, /* 081F */ + JOINING_TYPE_X, /* 0820 */ + JOINING_TYPE_X, /* 0821 */ + JOINING_TYPE_X, /* 0822 */ + JOINING_TYPE_X, /* 0823 */ + JOINING_TYPE_X, /* 0824 */ + JOINING_TYPE_X, /* 0825 */ + JOINING_TYPE_X, /* 0826 */ + JOINING_TYPE_X, /* 0827 */ + JOINING_TYPE_X, /* 0828 */ + JOINING_TYPE_X, /* 0829 */ + JOINING_TYPE_X, /* 082A */ + JOINING_TYPE_X, /* 082B */ + JOINING_TYPE_X, /* 082C */ + JOINING_TYPE_X, /* 082D */ + JOINING_TYPE_X, /* 082E */ + JOINING_TYPE_X, /* 082F */ + JOINING_TYPE_X, /* 0830 */ + JOINING_TYPE_X, /* 0831 */ + JOINING_TYPE_X, /* 0832 */ + JOINING_TYPE_X, /* 0833 */ + JOINING_TYPE_X, /* 0834 */ + JOINING_TYPE_X, /* 0835 */ + JOINING_TYPE_X, /* 0836 */ + JOINING_TYPE_X, /* 0837 */ + JOINING_TYPE_X, /* 0838 */ + JOINING_TYPE_X, /* 0839 */ + JOINING_TYPE_X, /* 083A */ + JOINING_TYPE_X, /* 083B */ + JOINING_TYPE_X, /* 083C */ + JOINING_TYPE_X, /* 083D */ + JOINING_TYPE_X, /* 083E */ + JOINING_TYPE_X, /* 083F */ + JOINING_TYPE_R, /* 0840; MANDAIC HALQA; R; No_Joining_Group */ + JOINING_TYPE_D, /* 0841; MANDAIC AB; D; No_Joining_Group */ + JOINING_TYPE_D, /* 0842; MANDAIC AG; D; No_Joining_Group */ + JOINING_TYPE_D, /* 0843; MANDAIC AD; D; No_Joining_Group */ + JOINING_TYPE_D, /* 0844; MANDAIC AH; D; No_Joining_Group */ + JOINING_TYPE_D, /* 0845; MANDAIC USHENNA; D; No_Joining_Group */ + JOINING_TYPE_R, /* 0846; MANDAIC AZ; R; No_Joining_Group */ + JOINING_TYPE_D, /* 0847; MANDAIC IT; D; No_Joining_Group */ + JOINING_TYPE_D, /* 0848; MANDAIC ATT; D; No_Joining_Group */ + JOINING_TYPE_R, /* 0849; MANDAIC AKSA; R; No_Joining_Group */ + JOINING_TYPE_D, /* 084A; MANDAIC AK; D; No_Joining_Group */ + JOINING_TYPE_D, /* 084B; MANDAIC AL; D; No_Joining_Group */ + JOINING_TYPE_D, /* 084C; MANDAIC AM; D; No_Joining_Group */ + JOINING_TYPE_D, /* 084D; MANDAIC AN; D; No_Joining_Group */ + JOINING_TYPE_D, /* 084E; MANDAIC AS; D; No_Joining_Group */ + JOINING_TYPE_R, /* 084F; MANDAIC IN; R; No_Joining_Group */ + JOINING_TYPE_D, /* 0850; MANDAIC AP; D; No_Joining_Group */ + JOINING_TYPE_D, /* 0851; MANDAIC ASZ; D; No_Joining_Group */ + JOINING_TYPE_D, /* 0852; MANDAIC AQ; D; No_Joining_Group */ + JOINING_TYPE_D, /* 0853; MANDAIC AR; D; No_Joining_Group */ + JOINING_TYPE_R, /* 0854; MANDAIC ASH; R; No_Joining_Group */ + JOINING_TYPE_D, /* 0855; MANDAIC AT; D; No_Joining_Group */ + JOINING_TYPE_U, /* 0856; MANDAIC DUSHENNA; U; No_Joining_Group */ + JOINING_TYPE_U, /* 0857; MANDAIC KAD; U; No_Joining_Group */ + JOINING_TYPE_U, /* 0858; MANDAIC AIN; U; No_Joining_Group */ + JOINING_TYPE_X /* dummy */ }; +/* == End of generated table == */ static unsigned int get_joining_type (hb_codepoint_t u, hb_category_t gen_cat) { /* TODO Macroize the magic bit operations */ - if (likely ((u & ~(0x0600^0x07FF)) == 0x0600)) { - unsigned int j_type = arabic_syriac_nko_joining_types[u - 0x0600]; + if (likely (JOINING_TABLE_FIRST <= u && u <= JOINING_TABLE_LAST)) { + unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST]; if (likely (j_type != JOINING_TYPE_X)) return j_type; }