Update Arabic joining table to include Mandaic
Mandaic was added to Unicode 6.0, but the joining data was not updated. Draft ArabicShaping.txt from 6.1 includes the joining data for Mandaic. Use that.
This commit is contained in:
parent
43bf2f7f1e
commit
14d784116b
|
@ -11,22 +11,39 @@ for line in sys.stdin:
|
|||
fields = [x.strip() for x in line.split(';')]
|
||||
u = int(fields[0], 16)
|
||||
|
||||
if u < 0x0600 or (u > 0x07FF and u != 0x200C and u != 0x200D):
|
||||
if u == 0x200C or u == 0x200D:
|
||||
continue
|
||||
if u < 0x0600:
|
||||
raise Exception ("Ooops, unexpected unicode character: ", fields)
|
||||
dic[u] = fields
|
||||
|
||||
print " /*"
|
||||
print " * The following table is generated by running:"
|
||||
print " *"
|
||||
print " * ./gen-arabic-joining-table.py < ArabicShaping.txt"
|
||||
print " *"
|
||||
print " * on the ArabicShaping.txt file with the header:"
|
||||
print " *"
|
||||
v = dic.keys()
|
||||
v.sort()
|
||||
min_u, max_u = v[0], v[-1]
|
||||
occupancy = len(v) * 100 / (max_u - min_u + 1)
|
||||
|
||||
# Maintain at least 40% occupancy in the table */
|
||||
if occupancy < 40:
|
||||
raise Exception ("Table too sparse, please investigate: ", occupancy)
|
||||
|
||||
print "/* == Start of generated table == */"
|
||||
print "/*"
|
||||
print " * The following table is generated by running:"
|
||||
print " *"
|
||||
print " * ./gen-arabic-joining-table.py < ArabicShaping.txt"
|
||||
print " *"
|
||||
print " * on the ArabicShaping.txt file with the header:"
|
||||
print " *"
|
||||
for line in header:
|
||||
print " * %s" % (line.strip())
|
||||
print " */"
|
||||
print " /* == Start of generated table == */"
|
||||
for i in range(0x0600, 0x0800):
|
||||
print " * %s" % (line.strip())
|
||||
print " */"
|
||||
|
||||
print "#define JOINING_TABLE_FIRST 0x%04x" % min_u
|
||||
print "#define JOINING_TABLE_LAST 0x%04x" % max_u
|
||||
print "static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] ="
|
||||
print "{"
|
||||
|
||||
for i in range(min_u, max_u + 1):
|
||||
if i not in dic:
|
||||
print " JOINING_TYPE_X, /* %04X */" % i
|
||||
else:
|
||||
|
@ -36,4 +53,6 @@ for i in range(0x0600, 0x0800):
|
|||
else:
|
||||
value = "JOINING_TYPE_" + entry[2]
|
||||
print " %s, /* %s */" % (value, '; '.join(entry))
|
||||
print " /* == End of generated table == */"
|
||||
print " JOINING_TYPE_X /* dummy */"
|
||||
print "};"
|
||||
print "/* == End of generated table == */"
|
||||
|
|
|
@ -56,23 +56,21 @@ enum {
|
|||
*/
|
||||
|
||||
|
||||
/* == Start of generated table == */
|
||||
/*
|
||||
* Main joining-type table, covering U+0600..U+07FF.
|
||||
* Includes Arabic, Syriac, and N'ko.
|
||||
* The following table is generated by running:
|
||||
*
|
||||
* ./gen-arabic-joining-table.py < ArabicShaping.txt
|
||||
*
|
||||
* on the ArabicShaping.txt file with the header:
|
||||
*
|
||||
* # ArabicShaping-6.1.0.txt
|
||||
* # Date: 2010-11-09, 12:10:00 PST [KW]
|
||||
*/
|
||||
static const uint8_t arabic_syriac_nko_joining_types[0x0800 - 0x0600 + 1] =
|
||||
#define JOINING_TABLE_FIRST 0x0600
|
||||
#define JOINING_TABLE_LAST 0x0858
|
||||
static const uint8_t joining_table[JOINING_TABLE_LAST-JOINING_TABLE_FIRST+2] =
|
||||
{
|
||||
/*
|
||||
* The following table is generated by running:
|
||||
*
|
||||
* ./gen-arabic-joining-table.py < ArabicShaping.txt
|
||||
*
|
||||
* on the ArabicShaping.txt file with the header:
|
||||
*
|
||||
* # ArabicShaping-6.0.0.txt
|
||||
* # Date: 2010-04-30, 13:47:00 PDT [KW]
|
||||
*/
|
||||
/* == Start of generated table == */
|
||||
JOINING_TYPE_U, /* 0600; ARABIC NUMBER SIGN; U; No_Joining_Group */
|
||||
JOINING_TYPE_U, /* 0601; ARABIC SIGN SANAH; U; No_Joining_Group */
|
||||
JOINING_TYPE_U, /* 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group */
|
||||
|
@ -585,16 +583,105 @@ static const uint8_t arabic_syriac_nko_joining_types[0x0800 - 0x0600 + 1] =
|
|||
JOINING_TYPE_X, /* 07FD */
|
||||
JOINING_TYPE_X, /* 07FE */
|
||||
JOINING_TYPE_X, /* 07FF */
|
||||
/* == End of generated table == */
|
||||
JOINING_TYPE_X
|
||||
JOINING_TYPE_X, /* 0800 */
|
||||
JOINING_TYPE_X, /* 0801 */
|
||||
JOINING_TYPE_X, /* 0802 */
|
||||
JOINING_TYPE_X, /* 0803 */
|
||||
JOINING_TYPE_X, /* 0804 */
|
||||
JOINING_TYPE_X, /* 0805 */
|
||||
JOINING_TYPE_X, /* 0806 */
|
||||
JOINING_TYPE_X, /* 0807 */
|
||||
JOINING_TYPE_X, /* 0808 */
|
||||
JOINING_TYPE_X, /* 0809 */
|
||||
JOINING_TYPE_X, /* 080A */
|
||||
JOINING_TYPE_X, /* 080B */
|
||||
JOINING_TYPE_X, /* 080C */
|
||||
JOINING_TYPE_X, /* 080D */
|
||||
JOINING_TYPE_X, /* 080E */
|
||||
JOINING_TYPE_X, /* 080F */
|
||||
JOINING_TYPE_X, /* 0810 */
|
||||
JOINING_TYPE_X, /* 0811 */
|
||||
JOINING_TYPE_X, /* 0812 */
|
||||
JOINING_TYPE_X, /* 0813 */
|
||||
JOINING_TYPE_X, /* 0814 */
|
||||
JOINING_TYPE_X, /* 0815 */
|
||||
JOINING_TYPE_X, /* 0816 */
|
||||
JOINING_TYPE_X, /* 0817 */
|
||||
JOINING_TYPE_X, /* 0818 */
|
||||
JOINING_TYPE_X, /* 0819 */
|
||||
JOINING_TYPE_X, /* 081A */
|
||||
JOINING_TYPE_X, /* 081B */
|
||||
JOINING_TYPE_X, /* 081C */
|
||||
JOINING_TYPE_X, /* 081D */
|
||||
JOINING_TYPE_X, /* 081E */
|
||||
JOINING_TYPE_X, /* 081F */
|
||||
JOINING_TYPE_X, /* 0820 */
|
||||
JOINING_TYPE_X, /* 0821 */
|
||||
JOINING_TYPE_X, /* 0822 */
|
||||
JOINING_TYPE_X, /* 0823 */
|
||||
JOINING_TYPE_X, /* 0824 */
|
||||
JOINING_TYPE_X, /* 0825 */
|
||||
JOINING_TYPE_X, /* 0826 */
|
||||
JOINING_TYPE_X, /* 0827 */
|
||||
JOINING_TYPE_X, /* 0828 */
|
||||
JOINING_TYPE_X, /* 0829 */
|
||||
JOINING_TYPE_X, /* 082A */
|
||||
JOINING_TYPE_X, /* 082B */
|
||||
JOINING_TYPE_X, /* 082C */
|
||||
JOINING_TYPE_X, /* 082D */
|
||||
JOINING_TYPE_X, /* 082E */
|
||||
JOINING_TYPE_X, /* 082F */
|
||||
JOINING_TYPE_X, /* 0830 */
|
||||
JOINING_TYPE_X, /* 0831 */
|
||||
JOINING_TYPE_X, /* 0832 */
|
||||
JOINING_TYPE_X, /* 0833 */
|
||||
JOINING_TYPE_X, /* 0834 */
|
||||
JOINING_TYPE_X, /* 0835 */
|
||||
JOINING_TYPE_X, /* 0836 */
|
||||
JOINING_TYPE_X, /* 0837 */
|
||||
JOINING_TYPE_X, /* 0838 */
|
||||
JOINING_TYPE_X, /* 0839 */
|
||||
JOINING_TYPE_X, /* 083A */
|
||||
JOINING_TYPE_X, /* 083B */
|
||||
JOINING_TYPE_X, /* 083C */
|
||||
JOINING_TYPE_X, /* 083D */
|
||||
JOINING_TYPE_X, /* 083E */
|
||||
JOINING_TYPE_X, /* 083F */
|
||||
JOINING_TYPE_R, /* 0840; MANDAIC HALQA; R; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0841; MANDAIC AB; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0842; MANDAIC AG; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0843; MANDAIC AD; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0844; MANDAIC AH; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0845; MANDAIC USHENNA; D; No_Joining_Group */
|
||||
JOINING_TYPE_R, /* 0846; MANDAIC AZ; R; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0847; MANDAIC IT; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0848; MANDAIC ATT; D; No_Joining_Group */
|
||||
JOINING_TYPE_R, /* 0849; MANDAIC AKSA; R; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 084A; MANDAIC AK; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 084B; MANDAIC AL; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 084C; MANDAIC AM; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 084D; MANDAIC AN; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 084E; MANDAIC AS; D; No_Joining_Group */
|
||||
JOINING_TYPE_R, /* 084F; MANDAIC IN; R; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0850; MANDAIC AP; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0851; MANDAIC ASZ; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0852; MANDAIC AQ; D; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0853; MANDAIC AR; D; No_Joining_Group */
|
||||
JOINING_TYPE_R, /* 0854; MANDAIC ASH; R; No_Joining_Group */
|
||||
JOINING_TYPE_D, /* 0855; MANDAIC AT; D; No_Joining_Group */
|
||||
JOINING_TYPE_U, /* 0856; MANDAIC DUSHENNA; U; No_Joining_Group */
|
||||
JOINING_TYPE_U, /* 0857; MANDAIC KAD; U; No_Joining_Group */
|
||||
JOINING_TYPE_U, /* 0858; MANDAIC AIN; U; No_Joining_Group */
|
||||
JOINING_TYPE_X /* dummy */
|
||||
};
|
||||
/* == End of generated table == */
|
||||
|
||||
static unsigned int get_joining_type (hb_codepoint_t u, hb_category_t gen_cat)
|
||||
{
|
||||
/* TODO Macroize the magic bit operations */
|
||||
|
||||
if (likely ((u & ~(0x0600^0x07FF)) == 0x0600)) {
|
||||
unsigned int j_type = arabic_syriac_nko_joining_types[u - 0x0600];
|
||||
if (likely (JOINING_TABLE_FIRST <= u && u <= JOINING_TABLE_LAST)) {
|
||||
unsigned int j_type = joining_table[u - JOINING_TABLE_FIRST];
|
||||
if (likely (j_type != JOINING_TYPE_X))
|
||||
return j_type;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue