2020-02-19 12:26:55 +01:00
|
|
|
#!/usr/bin/env python3
|
2018-10-03 17:46:48 +02:00
|
|
|
|
2021-07-29 01:12:46 +02:00
|
|
|
"""usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt
|
2020-05-28 12:31:15 +02:00
|
|
|
|
|
|
|
Input file:
|
|
|
|
* https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
|
2021-07-29 01:12:46 +02:00
|
|
|
* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
|
2020-05-28 12:31:15 +02:00
|
|
|
"""
|
|
|
|
|
2018-10-03 17:46:48 +02:00
|
|
|
import sys
|
|
|
|
from collections import OrderedDict
|
2019-06-26 23:49:15 +02:00
|
|
|
import packTab
|
2018-10-03 17:46:48 +02:00
|
|
|
|
2021-07-29 01:12:46 +02:00
|
|
|
if len (sys.argv) != 3:
|
2020-05-28 20:21:29 +02:00
|
|
|
sys.exit (__doc__)
|
2018-10-03 17:46:48 +02:00
|
|
|
|
|
|
|
f = open(sys.argv[1])
|
|
|
|
header = [f.readline () for _ in range(10)]
|
|
|
|
|
2018-11-20 21:41:45 +01:00
|
|
|
ranges = OrderedDict()
|
2018-10-03 17:46:48 +02:00
|
|
|
for line in f.readlines():
|
|
|
|
line = line.strip()
|
|
|
|
if not line or line[0] == '#':
|
|
|
|
continue
|
|
|
|
rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]
|
|
|
|
|
|
|
|
rang = [int(s, 16) for s in rang.split('..')]
|
|
|
|
if len(rang) > 1:
|
|
|
|
start, end = rang
|
|
|
|
else:
|
|
|
|
start = end = rang[0]
|
|
|
|
|
2018-11-20 21:41:45 +01:00
|
|
|
if typ not in ranges:
|
|
|
|
ranges[typ] = []
|
|
|
|
if ranges[typ] and ranges[typ][-1][1] == start - 1:
|
|
|
|
ranges[typ][-1] = (ranges[typ][-1][0], end)
|
|
|
|
else:
|
|
|
|
ranges[typ].append((start, end))
|
2018-10-03 17:46:48 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print ("/* == Start of generated table == */")
|
|
|
|
print ("/*")
|
|
|
|
print (" * The following tables are generated by running:")
|
|
|
|
print (" *")
|
|
|
|
print (" * ./gen-emoji-table.py emoji-data.txt")
|
|
|
|
print (" *")
|
|
|
|
print (" * on file with this header:")
|
|
|
|
print (" *")
|
|
|
|
for l in header:
|
|
|
|
print (" * %s" % (l.strip()))
|
|
|
|
print (" */")
|
|
|
|
print ()
|
|
|
|
print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
|
|
|
|
print ("#define HB_UNICODE_EMOJI_TABLE_HH")
|
|
|
|
print ()
|
|
|
|
print ('#include "hb-unicode.hh"')
|
|
|
|
print ()
|
|
|
|
|
2019-06-28 20:23:51 +02:00
|
|
|
for typ, s in ranges.items():
|
2018-10-03 17:46:48 +02:00
|
|
|
if typ != "Extended_Pictographic": continue
|
2019-06-26 23:49:15 +02:00
|
|
|
|
2019-06-28 20:23:51 +02:00
|
|
|
arr = dict()
|
|
|
|
for start,end in s:
|
2021-06-09 23:10:52 +02:00
|
|
|
for i in range(start, end + 1):
|
2019-06-28 20:23:51 +02:00
|
|
|
arr[i] = 1
|
2019-06-26 23:49:15 +02:00
|
|
|
|
2022-07-30 08:09:33 +02:00
|
|
|
sol = packTab.pack_table(arr, 0, compression=9)
|
2019-06-28 20:23:51 +02:00
|
|
|
code = packTab.Code('_hb_emoji')
|
|
|
|
sol.genCode(code, 'is_'+typ)
|
|
|
|
code.print_c(linkage='static inline')
|
|
|
|
print()
|
2018-10-03 17:46:48 +02:00
|
|
|
|
|
|
|
print ()
|
|
|
|
print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
|
|
|
|
print ()
|
|
|
|
print ("/* == End of generated table == */")
|
2021-07-29 01:12:46 +02:00
|
|
|
|
|
|
|
|
|
|
|
# Generate test file.
|
|
|
|
sequences = []
|
|
|
|
with open(sys.argv[2]) as f:
|
|
|
|
for line in f.readlines():
|
|
|
|
if "#" in line:
|
|
|
|
line = line[:line.index("#")]
|
|
|
|
if ";" in line:
|
|
|
|
line = line[:line.index(";")]
|
|
|
|
line = line.strip()
|
|
|
|
line = line.split(" ")
|
2021-07-29 01:52:55 +02:00
|
|
|
if len(line) < 2:
|
2021-07-29 01:12:46 +02:00
|
|
|
continue
|
|
|
|
sequences.append(line)
|
|
|
|
|
2022-07-30 08:08:44 +02:00
|
|
|
with open("../test/shape/data/in-house/tests/emoji-clusters.tests", "w") as f:
|
2021-07-29 01:52:55 +02:00
|
|
|
for sequence in sequences:
|
2022-07-30 08:08:44 +02:00
|
|
|
f.write("../fonts/AdobeBlank2.ttf;--no-glyph-names --no-positions --font-funcs=ot")
|
|
|
|
f.write(";" + ",".join(sequence))
|
|
|
|
f.write(";[" + "|".join("1=0" for c in sequence) + "]\n")
|