#!/usr/bin/env python from __future__ import print_function, division, absolute_import import io, os.path, sys if len (sys.argv) != 2: print ("usage: ./gen-ucd ucdxml-file", file=sys.stderr) sys.exit (1) import youseedy, packTab ucd = youseedy.load_ucdxml (sys.argv[1]) gc = [u['gc'] for u in ucd] ccc = [int(u['ccc']) for u in ucd] sc = [u['sc'] for u in ucd] bmg = [int(v, 16) - int(u) if v else 0 for u,v in enumerate(u['bmg'] for u in ucd)] dm = {i:tuple(int(v, 16) for v in u['dm'].split()) for i,u in enumerate(ucd) if u['dm'] != '#' and u['dt'] == 'can' and not (0xAC00 <= i < 0xAC00+11172)} gc_set = set(gc) gc_ccc_non0 = set((cat,klass) for cat,klass in zip(gc,ccc) if klass) gc_bmg_non0 = set((cat,mirr) for cat,mirr in zip(gc, bmg) if mirr) sc_set = set(sc) dm2 = set(v for v in dm.values() if len(v) == 2) dm2diff = set(v[1] - v[0] for v in dm2) dm1 = set(v[0] for i,v in dm.items() if len(v) == 1) dmx = set(v for v in dm.values() if len(v) not in (1,2)) assert not dmx print(len(sorted(gc_set))) print(len(sorted(gc_ccc_non0))) print(len(sorted(gc_bmg_non0))) print("GC, CCC, and BMG fit in one byte. Compress together.") print() print(len(sorted(sc_set))) print("SC fits in one byte. Compress separately.") print() print(len(dm)) print(len(dm1), min(dm1), max(dm1)) print(len(dm2)) #print(sorted(dm2diff)) print(len(sorted(set(v // 512 for v in dm1))))