harfbuzz/src/gen-indic-table.py

#!/usr/bin/python

import sys

if len (sys.argv) != 4:
	print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
	sys.exit (1)

BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]

files = [file (x) for x in sys.argv[1:]]

headers = [[f.readline () for i in range (2)] for f in files]

data = [{} for f in files]
values = [{} for f in files]
for i, f in enumerate (files):
	for line in f:

		j = line.find ('#')
		if j >= 0:
			line = line[:j]

		fields = [x.strip () for x in line.split (';')]
		if len (fields) == 1:
			continue

		uu = fields[0].split ('..')
		start = int (uu[0], 16)
		if len (uu) == 1:
			end = start
		else:
			end = int (uu[1], 16)

		t = fields[1]

		for u in range (start, end + 1):
			data[i][u] = t
		values[i][t] = values[i].get (t, 0) + end - start + 1

# Merge data into one dict:
defaults = ('Other', 'Not_Applicable', 'No_Block')
for i,v in enumerate (defaults):
	values[i][v] = values[i].get (v, 0) + 1
combined = {}
for i,d in enumerate (data):
	for u,v in d.items ():
		if i == 2 and not u in combined:
			continue
		if not u in combined:
			combined[u] = list (defaults)
		combined[u][i] = v
combined = {k:v for k,v in combined.items() if v[2] not in BLACKLISTED_BLOCKS}
data = combined
del combined
num = len (data)

for u in [0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D3]:
	if data[u][0] == 'Other':
		data[u][0] = "Vowel_Dependent"

# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
singles = {}
for u in [0x00A0, 0x25CC]:
	singles[u] = data[u]
	del data[u]

print "/* == Start of generated table == */"
print "/*"
print " * The following table is generated by running:"
print " *"
print " *   ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"
print " *"
print " * on files with these headers:"
print " *"
for h in headers:
	for l in h:
		print " * %s" % (l.strip())
print " */"
print
print '#include "hb-ot-shape-complex-indic-private.hh"'
print

# Shorten values
short = [{
	"Bindu":		'Bi',
	"Cantillation_Mark":	'Ca',
	"Joiner":		'ZWJ',
	"Non_Joiner":		'ZWNJ',
	"Number":		'Nd',
	"Visarga":		'Vs',
	"Vowel":		'Vo',
	"Vowel_Dependent":	'M',
	"Other":		'x',
},{
	"Not_Applicable":	'x',
}]
all_shorts = [{},{}]

# Add some of the values, to make them more readable, and to avoid duplicates


for i in range (2):
	for v,s in short[i].items ():
		all_shorts[i][s] = v

what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]
what_short = ["ISC", "IMC"]
for i in range (2):
	print
	vv = values[i].keys ()
	vv.sort ()
	for v in vv:
		v_no_and = v.replace ('_And_', '_')
		if v in short[i]:
			s = short[i][v]
		else:
			s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
			if s in all_shorts[i]:
				raise Exception ("Duplicate short value alias", v, all_shorts[i][s])
			all_shorts[i][s] = v
			short[i][v] = s
		print "#define %s_%s	%s_%s	%s/* %3d chars; %s */" % \
			(what_short[i], s, what[i], v.upper (), \
			'	'* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \
			values[i][v], v)
print
print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)"
print
print

total = 0
used = 0
last_block = None
def print_block (block, start, end, data):
	global total, used, last_block
	if block and block != last_block:
		print
		print
		print "  /* %s */" % block
	num = 0
	assert start % 8 == 0
	assert (end+1) % 8 == 0
	for u in range (start, end+1):
		if u % 8 == 0:
			print
			print "  /* %04X */" % u,
		if u in data:
			num += 1
		d = data.get (u, defaults)
		sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])))

	total += end - start + 1
	used += num
	if block:
		last_block = block

uu = data.keys ()
uu.sort ()

last = -100000
num = 0
offset = 0
starts = []
ends = []
print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {"
for u in uu:
	if u <= last:
		continue
	block = data[u][2]

	start = u//8*8
	end = start+1
	while end in uu and block == data[end][2]:
		end += 1
	end = (end-1)//8*8 + 7

	if start != last + 1:
		if start - last <= 1+16*3:
			print_block (None, last+1, start-1, data)
			last = start-1
		else:
			if last >= 0:
				ends.append (last + 1)
				offset += ends[-1] - starts[-1]
			print
			print
			print "#define indic_offset_0x%04x %d" % (start, offset)
			starts.append (start)

	print_block (block, start, end, data)
	last = end
ends.append (last + 1)
offset += ends[-1] - starts[-1]
print
print
occupancy = used * 100. / total
page_bits = 12
print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)
print
print "INDIC_TABLE_ELEMENT_TYPE"
print "hb_indic_get_categories (hb_codepoint_t u)"
print "{"
print "  switch (u >> %d)" % page_bits
print "  {"
pages = set([u>>page_bits for u in starts+ends+singles.keys()])
for p in sorted(pages):
	print "    case 0x%0X:" % p
	for (start,end) in zip (starts, ends):
		if p not in [start>>page_bits, end>>page_bits]: continue
		offset = "indic_offset_0x%04x" % start
		print "      if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset)
	for u,d in singles.items ():
		if p != u>>page_bits: continue
		print "      if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])
	print "      break;"
	print ""
print "    default:"
print "      break;"
print "  }"
print "  return _(x,x);"
print "}"
print
print "#undef _"
for i in range (2):
	print
	vv = values[i].keys ()
	vv.sort ()
	for v in vv:
		print "#undef %s_%s" % \
			(what_short[i], short[i][v])
print
print "/* == End of generated table == */"

# Maintain at least 30% occupancy in the table */
if occupancy < 30:
	raise Exception ("Table too sparse, please investigate: ", occupancy)
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`#!/usr/bin/python`

			`import sys`

Generate fallback Arabic shaping table Not hooked up yet. 2012-04-10 22:25:08 +02:00			`if len (sys.argv) != 4:`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`print >>sys.stderr, "usage: ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"`
			`sys.exit (1)`

[indic-table] Black-list Thai, Lao, and Tibetan We don't need Indic table for those. 2014-06-20 21:25:30 +02:00			`BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]`

Generate fallback Arabic shaping table Not hooked up yet. 2012-04-10 22:25:08 +02:00			`files = [file (x) for x in sys.argv[1:]]`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
			`headers = [[f.readline () for i in range (2)] for f in files]`

			`data = [{} for f in files]`
			`values = [{} for f in files]`
			`for i, f in enumerate (files):`
			`for line in f:`

			`j = line.find ('#')`
			`if j >= 0:`
			`line = line[:j]`
Whitespace 2011-09-20 20:34:06 +02:00
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`fields = [x.strip () for x in line.split (';')]`
			`if len (fields) == 1:`
			`continue`

			`uu = fields[0].split ('..')`
			`start = int (uu[0], 16)`
			`if len (uu) == 1:`
			`end = start`
			`else:`
			`end = int (uu[1], 16)`

			`t = fields[1]`

			`for u in range (start, end + 1):`
			`data[i][u] = t`
[indic-table] Fix category frequency counts in comments 2014-06-30 20:30:54 +02:00			`values[i][t] = values[i].get (t, 0) + end - start + 1`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
			`# Merge data into one dict:`
			`defaults = ('Other', 'Not_Applicable', 'No_Block')`
			`for i,v in enumerate (defaults):`
			`values[i][v] = values[i].get (v, 0) + 1`
			`combined = {}`
			`for i,d in enumerate (data):`
			`for u,v in d.items ():`
			`if i == 2 and not u in combined:`
			`continue`
			`if not u in combined:`
			`combined[u] = list (defaults)`
			`combined[u][i] = v`
[indic-table] Black-list Thai, Lao, and Tibetan We don't need Indic table for those. 2014-06-20 21:25:30 +02:00			`combined = {k:v for k,v in combined.items() if v[2] not in BLACKLISTED_BLOCKS}`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`data = combined`
			`del combined`
			`num = len (data)`

[indic-table] Update to Unicode 7.0 data Touch code just enough to preserve previous syllable structure and functionality as closely as possible. Many further cleanups coming later. 2014-06-30 21:24:02 +02:00			`for u in [0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D3]:`
			`if data[u][0] == 'Other':`
			`data[u][0] = "Vowel_Dependent"`

[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out`
			`singles = {}`
			`for u in [0x00A0, 0x25CC]:`
			`singles[u] = data[u]`
			`del data[u]`

			`print "/* == Start of generated table == */"`
			`print "/*"`
			`print " * The following table is generated by running:"`
			`print " *"`
			`print " * ./gen-indic-table.py IndicSyllabicCategory.txt IndicMatraCategory.txt Blocks.txt"`
			`print " *"`
			`print " * on files with these headers:"`
			`print " *"`
			`for h in headers:`
			`for l in h:`
			`print " * %s" % (l.strip())`
			`print " */"`
Make src tests pass again 2012-03-07 21:33:14 +01:00			`print`
Add South-East Asian shaper Handles Tai Tham, Cham, and New Tai Lue for now. 2013-02-12 18:14:10 +01:00			`print '#include "hb-ot-shape-complex-indic-private.hh"'`
Make src tests pass again 2012-03-07 21:33:14 +01:00			`print`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
			`# Shorten values`
			`short = [{`
			`"Bindu": 'Bi',`
Add new IndicSyllabicCategory short forms for Unicode 7.0 2014-06-22 19:32:13 +02:00			`"Cantillation_Mark": 'Ca',`
			`"Joiner": 'ZWJ',`
			`"Non_Joiner": 'ZWNJ',`
			`"Number": 'Nd',`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`"Visarga": 'Vs',`
			`"Vowel": 'Vo',`
Cosmetic 2011-06-13 22:02:18 +02:00			`"Vowel_Dependent": 'M',`
			`"Other": 'x',`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`},{`
Cosmetic 2011-06-13 22:02:18 +02:00			`"Not_Applicable": 'x',`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`}]`
[indic-table-gen] Minor 2014-06-21 23:31:10 +02:00			`all_shorts = [{},{}]`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
			`# Add some of the values, to make them more readable, and to avoid duplicates`


			`for i in range (2):`
			`for v,s in short[i].items ():`
[indic-table-gen] Minor 2014-06-21 23:31:10 +02:00			`all_shorts[i][s] = v`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
			`what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]`
			`what_short = ["ISC", "IMC"]`
			`for i in range (2):`
			`print`
			`vv = values[i].keys ()`
			`vv.sort ()`
			`for v in vv:`
			`v_no_and = v.replace ('_And_', '_')`
			`if v in short[i]:`
			`s = short[i][v]`
			`else:`
			`s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])`
			`if s in all_shorts[i]:`
[indic-table-gen] Minor 2014-06-21 23:31:10 +02:00			`raise Exception ("Duplicate short value alias", v, all_shorts[i][s])`
			`all_shorts[i][s] = v`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`short[i][v] = s`
			`print "#define %s_%s %s_%s %s/* %3d chars; %s */" % \`
			`(what_short[i], s, what[i], v.upper (), \`
			`' '* ((48-1 - len (what[i]) - 1 - len (v)) / 8), \`
			`values[i][v], v)`
			`print`
			`print "#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)"`
			`print`
			`print`

Update Indic table to Unicode 6.1 data 2012-03-07 18:08:33 +01:00			`total = 0`
			`used = 0`
[indic-table] Make table more compact by not covering full blocks -#define indic_offset_total 4416 +#define indic_offset_total 3816 -}; /* Table occupancy: 60% / +}; / Table occupancy: 69% */ 2014-06-20 20:56:22 +02:00			`last_block = None`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`def print_block (block, start, end, data):`
[indic-table] Make table more compact by not covering full blocks -#define indic_offset_total 4416 +#define indic_offset_total 3816 -}; /* Table occupancy: 60% / +}; / Table occupancy: 69% */ 2014-06-20 20:56:22 +02:00			`global total, used, last_block`
			`if block and block != last_block:`
			`print`
			`print`
			`print " /* %s */" % block`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`num = 0`
[indic-table] Remove block range from data table No functional change. 2014-06-20 20:41:39 +02:00			`assert start % 8 == 0`
			`assert (end+1) % 8 == 0`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`for u in range (start, end+1):`
			`if u % 8 == 0:`
			`print`
			`print " /* %04X */" % u,`
			`if u in data:`
			`num += 1`
			`d = data.get (u, defaults)`
Cosmetic 2011-06-13 22:02:18 +02:00			`sys.stdout.write ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])))`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
Update Indic table to Unicode 6.1 data 2012-03-07 18:08:33 +01:00			`total += end - start + 1`
			`used += num`
[indic-table] Make table more compact by not covering full blocks -#define indic_offset_total 4416 +#define indic_offset_total 3816 -}; /* Table occupancy: 60% / +}; / Table occupancy: 69% */ 2014-06-20 20:56:22 +02:00			`if block:`
			`last_block = block`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
			`uu = data.keys ()`
			`uu.sort ()`

Minor 2014-06-22 19:29:59 +02:00			`last = -100000`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`num = 0`
[Indic] Generate a single data table instead of multiple ones 2011-06-28 20:03:29 +02:00			`offset = 0`
			`starts = []`
			`ends = []`
			`print "static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {"`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`for u in uu:`
			`if u <= last:`
			`continue`
			`block = data[u][2]`
[indic-table] Make table more compact by not covering full blocks -#define indic_offset_total 4416 +#define indic_offset_total 3816 -}; /* Table occupancy: 60% / +}; / Table occupancy: 69% */ 2014-06-20 20:56:22 +02:00
			`start = u//8*8`
			`end = start+1`
[indic-table] Minor No output change. 2014-06-20 22:47:43 +02:00			`while end in uu and block == data[end][2]:`
[indic-table] Make table more compact by not covering full blocks -#define indic_offset_total 4416 +#define indic_offset_total 3816 -}; /* Table occupancy: 60% / +}; / Table occupancy: 69% */ 2014-06-20 20:56:22 +02:00			`end += 1`
			`end = (end-1)//8*8 + 7`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
			`if start != last + 1:`
[indic-table] Make table more compact by not covering full blocks -#define indic_offset_total 4416 +#define indic_offset_total 3816 -}; /* Table occupancy: 60% / +}; / Table occupancy: 69% */ 2014-06-20 20:56:22 +02:00			`if start - last <= 1+16*3:`
			`print_block (None, last+1, start-1, data)`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`last = start-1`
			`else:`
			`if last >= 0:`
[Indic] Generate a single data table instead of multiple ones 2011-06-28 20:03:29 +02:00			`ends.append (last + 1)`
			`offset += ends[-1] - starts[-1]`
			`print`
			`print`
			`print "#define indic_offset_0x%04x %d" % (start, offset)`
			`starts.append (start)`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00
			`print_block (block, start, end, data)`
			`last = end`
[Indic] Generate a single data table instead of multiple ones 2011-06-28 20:03:29 +02:00			`ends.append (last + 1)`
			`offset += ends[-1] - starts[-1]`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`print`
			`print`
Update Indic table to Unicode 6.1 data 2012-03-07 18:08:33 +01:00			`occupancy = used * 100. / total`
[indic-table] Minor 2014-06-21 00:01:34 +02:00			`page_bits = 12`
[indic-table] Fix compiler warning 2014-06-20 20:58:53 +02:00			`print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`print`
Add South-East Asian shaper Handles Tai Tham, Cham, and New Tai Lue for now. 2013-02-12 18:14:10 +01:00			`print "INDIC_TABLE_ELEMENT_TYPE"`
			`print "hb_indic_get_categories (hb_codepoint_t u)"`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`print "{"`
[indic-table] Minor 2014-06-21 00:01:34 +02:00			`print " switch (u >> %d)" % page_bits`
[indic-table] Speed up lookup 2014-06-20 21:12:49 +02:00			`print " {"`
[indic-table] Minor 2014-06-21 00:01:34 +02:00			`pages = set([u>>page_bits for u in starts+ends+singles.keys()])`
[indic-table] Make output stable 2014-06-20 23:57:03 +02:00			`for p in sorted(pages):`
[indic-table] Speed up lookup 2014-06-20 21:12:49 +02:00			`print " case 0x%0X:" % p`
			`for (start,end) in zip (starts, ends):`
[indic-table] Minor 2014-06-21 00:01:34 +02:00			`if p not in [start>>page_bits, end>>page_bits]: continue`
[indic-table] Speed up lookup 2014-06-20 21:12:49 +02:00			`offset = "indic_offset_0x%04x" % start`
			`print " if (0x%04X <= u && u <= 0x%04X) return indic_table[u - 0x%04X + %s];" % (start, end, start, offset)`
			`for u,d in singles.items ():`
[indic-table] Minor 2014-06-21 00:01:34 +02:00			`if p != u>>page_bits: continue`
[indic-table] Speed up lookup 2014-06-20 21:12:49 +02:00			`print " if (unlikely (u == 0x%04X)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])`
			`print " break;"`
			`print ""`
			`print " default:"`
			`print " break;"`
			`print " }"`
Cosmetic 2011-06-13 22:02:18 +02:00			`print " return _(x,x);"`
[Indic] Start an Indic shaper Nothing functional in there yet. So far, we're parsing IndicSyllabicCategory.txt and IndicMatraCategory.txt fils from Unicode Character Database and store them in an array to be used by the shaper. Also hooked up the shaper, but it does not do anything right now. 2011-06-02 23:43:12 +02:00			`print "}"`
			`print`
			`print "#undef _"`
			`for i in range (2):`
			`print`
			`vv = values[i].keys ()`
			`vv.sort ()`
			`for v in vv:`
			`print "#undef %s_%s" % \`
			`(what_short[i], short[i][v])`
			`print`
			`print "/* == End of generated table == */"`
Update Indic table to Unicode 6.1 data 2012-03-07 18:08:33 +01:00
			`# Maintain at least 30% occupancy in the table */`
			`if occupancy < 30:`
			`raise Exception ("Table too sparse, please investigate: ", occupancy)`