harfbuzz/src/gen-arabic-joining-list.py

107 lines
2.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""usage: ./gen-arabic-joining-table.py ArabicShaping.txt Scripts.txt
Input files:
* https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt
* https://unicode.org/Public/UCD/latest/ucd/Scripts.txt
"""
import os.path, sys
if len (sys.argv) != 3:
sys.exit (__doc__)
files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
headers = [[f.readline (), f.readline ()] for f in files]
while files[0].readline ().find ('##################') < 0:
pass
def read (f):
mapping = {}
for line in f:
j = line.find ('#')
if j >= 0:
line = line[:j]
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
uu = fields[0].split ('..')
start = int (uu[0], 16)
if len (uu) == 1:
end = start
else:
end = int (uu[1], 16)
t = fields[1]
for u in range (start, end + 1):
mapping[u] = t
return mapping
def read_joining_uu (f):
values = set ()
for line in f:
if line[0] == '#':
continue
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
if fields[2] in {'T', 'U'}:
continue
values.add (int (fields[0], 16))
return sorted (values)
def print_has_arabic_joining (scripts, joining_uu):
print ("static bool")
print ("has_arabic_joining (hb_script_t script)")
print ("{")
print (" /* List of scripts that have data in arabic-table. */")
print (" switch ((int) script)")
print (" {")
for script in sorted ({scripts[u] for u in joining_uu if scripts[u] not in {'Common', 'Inherited'}}):
print (" case HB_SCRIPT_{}:".format (script.upper ()))
print (" return true;")
print ()
print (" default:")
print (" return false;")
print (" }")
print ("}")
print ()
print ("/* == Start of generated function == */")
print ("/*")
print (" * The following function is generated by running:")
print (" *")
print (" * ./gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt")
print (" *")
print (" * on files with these headers:")
print (" *")
for h in headers:
for l in h:
print (" * %s" % (l.strip ()))
print (" */")
print ()
print ("#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH")
print ("#define HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH")
print ()
print_has_arabic_joining (read (files[1]), read_joining_uu (files[0]))
print ()
print ("#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH */")
print ()
print ("/* == End of generated function == */")