#!/usr/bin/env python3 import sys import os.path from collections import OrderedDict import packTab if len (sys.argv) != 2: print("""usage: ./gen-emoji-table.py emoji-data.txt Input file, as of Unicode 12: * https://www.unicode.org/Public/emoji/12.0/emoji-data.txt""", file=sys.stderr) sys.exit (1) f = open(sys.argv[1]) header = [f.readline () for _ in range(10)] ranges = OrderedDict() for line in f.readlines(): line = line.strip() if not line or line[0] == '#': continue rang, typ = [s.strip() for s in line.split('#')[0].split(';')[:2]] rang = [int(s, 16) for s in rang.split('..')] if len(rang) > 1: start, end = rang else: start = end = rang[0] if typ not in ranges: ranges[typ] = [] if ranges[typ] and ranges[typ][-1][1] == start - 1: ranges[typ][-1] = (ranges[typ][-1][0], end) else: ranges[typ].append((start, end)) print ("/* == Start of generated table == */") print ("/*") print (" * The following tables are generated by running:") print (" *") print (" * ./gen-emoji-table.py emoji-data.txt") print (" *") print (" * on file with this header:") print (" *") for l in header: print (" * %s" % (l.strip())) print (" */") print () print ("#ifndef HB_UNICODE_EMOJI_TABLE_HH") print ("#define HB_UNICODE_EMOJI_TABLE_HH") print () print ('#include "hb-unicode.hh"') print () for typ, s in ranges.items(): if typ != "Extended_Pictographic": continue arr = dict() for start,end in s: for i in range(start,end): arr[i] = 1 sol = packTab.pack_table(arr, 0, compression=3) code = packTab.Code('_hb_emoji') sol.genCode(code, 'is_'+typ) code.print_c(linkage='static inline') print() print () print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") print () print ("/* == End of generated table == */")