harfbuzz/src/gen-emoji-table.py

#!/usr/bin/env python3

import sys
import os.path
from collections import OrderedDict
import packTab

if len (sys.argv) != 2:
	print("""usage: ./gen-emoji-table.py emoji-data.txt

Input file, as of Unicode 12:
* https://www.unicode.org/Public/emoji/12.0/emoji-data.txt""", file=sys.stderr)
	sys.exit (1)

f = open(sys.argv[1])
header = [f.readline () for _ in range(10)]

ranges = OrderedDict()
for line in f.readlines():
	line = line.strip()
	if not line or line[0] == '#':
		continue
	rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]

	rang = [int(s, 16) for s in rang.split('..')]
	if len(rang) > 1:
		start, end = rang
	else:
		start = end = rang[0]

	if typ not in ranges:
		ranges[typ] = []
	if ranges[typ] and ranges[typ][-1][1] == start - 1:
		ranges[typ][-1] = (ranges[typ][-1][0], end)
	else:
		ranges[typ].append((start, end))


print ("/* == Start of generated table == */")
print ("/*")
print (" * The following tables are generated by running:")
print (" *")
print (" *   ./gen-emoji-table.py emoji-data.txt")
print (" *")
print (" * on file with this header:")
print (" *")
for l in header:
	print (" * %s" % (l.strip()))
print (" */")
print ()
print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")
print ("#define HB_UNICODE_EMOJI_TABLE_HH")
print ()
print ('#include "hb-unicode.hh"')
print ()

for typ, s in ranges.items():
	if typ != "Extended_Pictographic": continue

	arr = dict()
	for start,end in s:
		for i in range(start,end):
			arr[i] = 1

	sol = packTab.pack_table(arr, 0, compression=3)
	code = packTab.Code('_hb_emoji')
	sol.genCode(code, 'is_'+typ)
	code.print_c(linkage='static inline')
	print()

print ()
print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
print ()
print ("/* == End of generated table == */")
Remove python2 support from tests/utils scripts 2020-02-19 12:26:55 +01:00			`#!/usr/bin/env python3`
[emoji] Add emoji Extended_Pictographic table and function Part of https://github.com/harfbuzz/harfbuzz/issues/1159 . 2018-10-03 17:46:48 +02:00
			`import sys`
			`import os.path`
			`from collections import OrderedDict`
[emoji] Port generator to packtab 2019-06-26 23:49:15 +02:00			`import packTab`
[emoji] Add emoji Extended_Pictographic table and function Part of https://github.com/harfbuzz/harfbuzz/issues/1159 . 2018-10-03 17:46:48 +02:00
			`if len (sys.argv) != 2:`
[tools] Print unicode links on gen-* tools output As Behdad's review 2020-02-10 14:49:23 +01:00			`print("""usage: ./gen-emoji-table.py emoji-data.txt`

			`Input file, as of Unicode 12:`
			`* https://www.unicode.org/Public/emoji/12.0/emoji-data.txt""", file=sys.stderr)`
[emoji] Add emoji Extended_Pictographic table and function Part of https://github.com/harfbuzz/harfbuzz/issues/1159 . 2018-10-03 17:46:48 +02:00			`sys.exit (1)`

			`f = open(sys.argv[1])`
			`header = [f.readline () for _ in range(10)]`

Shrink the emoji table by merging adjacent ranges 2018-11-20 21:41:45 +01:00			`ranges = OrderedDict()`
[emoji] Add emoji Extended_Pictographic table and function Part of https://github.com/harfbuzz/harfbuzz/issues/1159 . 2018-10-03 17:46:48 +02:00			`for line in f.readlines():`
			`line = line.strip()`
			`if not line or line[0] == '#':`
			`continue`
			`rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]]`

			`rang = [int(s, 16) for s in rang.split('..')]`
			`if len(rang) > 1:`
			`start, end = rang`
			`else:`
			`start = end = rang[0]`

Shrink the emoji table by merging adjacent ranges 2018-11-20 21:41:45 +01:00			`if typ not in ranges:`
			`ranges[typ] = []`
			`if ranges[typ] and ranges[typ][-1][1] == start - 1:`
			`ranges[typ][-1] = (ranges[typ][-1][0], end)`
			`else:`
			`ranges[typ].append((start, end))`
[emoji] Add emoji Extended_Pictographic table and function Part of https://github.com/harfbuzz/harfbuzz/issues/1159 . 2018-10-03 17:46:48 +02:00


			`print ("/* == Start of generated table == */")`
			`print ("/*")`
			`print (" * The following tables are generated by running:")`
			`print (" *")`
			`print (" * ./gen-emoji-table.py emoji-data.txt")`
			`print (" *")`
			`print (" * on file with this header:")`
			`print (" *")`
			`for l in header:`
			`print (" * %s" % (l.strip()))`
			`print (" */")`
			`print ()`
			`print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH")`
			`print ("#define HB_UNICODE_EMOJI_TABLE_HH")`
			`print ()`
			`print ('#include "hb-unicode.hh"')`
			`print ()`

style fix for pylint complain 2019-06-28 20:23:51 +02:00			`for typ, s in ranges.items():`
[emoji] Add emoji Extended_Pictographic table and function Part of https://github.com/harfbuzz/harfbuzz/issues/1159 . 2018-10-03 17:46:48 +02:00			`if typ != "Extended_Pictographic": continue`
[emoji] Port generator to packtab 2019-06-26 23:49:15 +02:00
style fix for pylint complain 2019-06-28 20:23:51 +02:00			`arr = dict()`
			`for start,end in s:`
			`for i in range(start,end):`
			`arr[i] = 1`
[emoji] Port generator to packtab 2019-06-26 23:49:15 +02:00
style fix for pylint complain 2019-06-28 20:23:51 +02:00			`sol = packTab.pack_table(arr, 0, compression=3)`
			`code = packTab.Code('_hb_emoji')`
			`sol.genCode(code, 'is_'+typ)`
			`code.print_c(linkage='static inline')`
			`print()`
[emoji] Add emoji Extended_Pictographic table and function Part of https://github.com/harfbuzz/harfbuzz/issues/1159 . 2018-10-03 17:46:48 +02:00
			`print ()`
			`print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")`
			`print ()`
			`print ("/* == End of generated table == */")`