harfbuzz/src/gen-emoji-table.py

76 lines
1.7 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import sys
import os.path
from collections import OrderedDict
2019-06-26 23:49:15 +02:00
import packTab
if len (sys.argv) != 2:
print("""usage: ./gen-emoji-table.py emoji-data.txt
Input file, as of Unicode 12:
* https://www.unicode.org/Public/emoji/12.0/emoji-data.txt""", file=sys.stderr)
sys.exit (1)
f = open(sys.argv[1])
header = [f.readline () for _ in range(10)]
ranges = OrderedDict()
for line in f.readlines():
line = line.strip()
if not line or line[0] == '#':
continue
rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
rang = [int(s, 16) for s in rang.split('..')]
if len(rang) > 1:
start, end = rang
else:
start = end = rang[0]
if typ not in ranges:
ranges[typ] = []
if ranges[typ] and ranges[typ][-1][1] == start - 1:
ranges[typ][-1] = (ranges[typ][-1][0], end)
else:
ranges[typ].append((start, end))
print ("/* == Start of generated table == */")
print ("/*")
print (" * The following tables are generated by running:")
print (" *")
print (" * ./gen-emoji-table.py emoji-data.txt")
print (" *")
print (" * on file with this header:")
print (" *")
for l in header:
print (" * %s" % (l.strip()))
print (" */")
print ()
print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
print ("#define HB_UNICODE_EMOJI_TABLE_HH")
print ()
print ('#include "hb-unicode.hh"')
print ()
2019-06-28 20:23:51 +02:00
for typ, s in ranges.items():
if typ != "Extended_Pictographic": continue
2019-06-26 23:49:15 +02:00
2019-06-28 20:23:51 +02:00
arr = dict()
for start,end in s:
for i in range(start,end):
arr[i] = 1
2019-06-26 23:49:15 +02:00
2019-06-28 20:23:51 +02:00
sol = packTab.pack_table(arr, 0, compression=3)
code = packTab.Code('_hb_emoji')
sol.genCode(code, 'is_'+typ)
code.print_c(linkage='static inline')
print()
print ()
print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
print ()
print ("/* == End of generated table == */")