fontconfig/fc-lang/fc-lang.py

#!/usr/bin/env python3
#
# fontconfig/fc-lang/fc-lang.py
#
# Copyright © 2001-2002 Keith Packard
# Copyright © 2019 Tim-Philipp Müller
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation, and that the name of the author(s) not be used in
# advertising or publicity pertaining to distribution of the software without
# specific, written prior permission.  The authors make no
# representations about the suitability of this software for any purpose.  It
# is provided "as is" without express or implied warranty.
#
# THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
# EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.

# fc-lang
#
# Read a set of language orthographies and build C declarations for
# charsets which can then be used to identify which languages are
# supported by a given font.
#
# TODO: this code is not very pythonic, a lot of it is a 1:1 translation
# of the C code and we could probably simplify it a bit
import argparse
import string
import sys
import os

# we just store the leaves in a dict, we can order the leaves later if needed
class CharSet:
    def __init__(self):
        self.leaves = {} # leaf_number -> leaf data (= 16 uint32)

    def add_char(self, ucs4):
        assert ucs4 < 0x01000000
        leaf_num = ucs4 >> 8
        if leaf_num in self.leaves:
            leaf = self.leaves[leaf_num]
        else:
            leaf = [0, 0, 0, 0, 0, 0, 0, 0] # 256/32 = 8
            self.leaves[leaf_num] = leaf
        leaf[(ucs4 & 0xff) >> 5] |= (1 << (ucs4 & 0x1f))
        #print('{:08x} [{:04x}] --> {}'.format(ucs4, ucs4>>8, leaf))

    def del_char(self, ucs4):
        assert ucs4 < 0x01000000
        leaf_num = ucs4 >> 8
        if leaf_num in self.leaves:
            leaf = self.leaves[leaf_num]
            leaf[(ucs4 & 0xff) >> 5] &= ~(1 << (ucs4 & 0x1f))
            # We don't bother removing the leaf if it's empty */
            #print('{:08x} [{:04x}] --> {}'.format(ucs4, ucs4>>8, leaf))

    def equals(self, other_cs):
        keys = sorted(self.leaves.keys())
        other_keys = sorted(other_cs.leaves.keys())
        if len(keys) != len(other_keys):
            return False
        for k1, k2 in zip(keys, other_keys):
            if k1 != k2:
                return False
            if not leaves_equal(self.leaves[k1], other_cs.leaves[k2]):
                return False
        return True

# Convert a file name into a name suitable for C declarations
def get_name(file_name):
    return file_name.split('.')[0]

# Convert a C name into a language name
def get_lang(c_name):
    return c_name.replace('_', '-').replace(' ', '').lower()

def read_orth_file(file_name):
    lines = []
    with open(file_name, 'r', encoding='utf-8') as orth_file:
        for num, line in enumerate(orth_file):
            if line.startswith('include '):
                include_fn = line[8:].strip()
                lines += read_orth_file(include_fn)
            else:
                # remove comments and strip whitespaces
                line = line.split('#')[0].strip()
                line = line.split('\t')[0].strip()
                # skip empty lines
                if line:
                    lines += [(file_name, num, line)]

    return lines

def leaves_equal(leaf1, leaf2):
    for v1, v2 in zip(leaf1, leaf2):
        if v1 != v2:
            return False
    return True

# Build a single charset from a source file
#
# The file format is quite simple, either
# a single hex value or a pair separated with a dash
def parse_orth_file(file_name, lines):
    charset = CharSet()
    for fn, num, line in lines:
        delete_char = line.startswith('-')
        if delete_char:
            line = line[1:]
        if line.find('-') != -1:
            parts = line.split('-')
        elif line.find('..') != -1:
            parts = line.split('..')
        else:
            parts = [line]

        start = int(parts.pop(0), 16)
        end = start
        if parts:
            end = int(parts.pop(0), 16)
        if parts:
            print('ERROR: {} line {}: parse error (too many parts)'.format(fn, num))

        for ucs4 in range(start, end+1):
            if delete_char:
                charset.del_char(ucs4)
            else:
                charset.add_char(ucs4)

    assert charset.equals(charset) # sanity check for the equals function

    return charset

if __name__=='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('orth_files', nargs='+', help='List of .orth files')
    parser.add_argument('--directory', dest='directory', default=None)
    parser.add_argument('--template', dest='template_file', default=None)
    parser.add_argument('--output', dest='output_file', default=None)

    args = parser.parse_args()

    sets = []
    names = []
    langs = []
    country = []

    total_leaves = 0

    LangCountrySets = {}

    # Open output file
    if args.output_file:
        sys.stdout = open(args.output_file, 'w', encoding='utf-8')

    # Read the template file
    if args.template_file:
        tmpl_file = open(args.template_file, 'r', encoding='utf-8')
    else:
        tmpl_file = sys.stdin

    # Change into source dir if specified (after opening other files)
    if args.directory:
        os.chdir(args.directory)

    orth_entries = {}
    for i, fn in enumerate(args.orth_files):
        orth_entries[fn] = i

    for fn in sorted(orth_entries.keys()):
        lines = read_orth_file(fn)
        charset = parse_orth_file(fn, lines)

        sets.append(charset)

        name = get_name(fn)
        names.append(name)

        lang = get_lang(name)
        langs.append(lang)
        if lang.find('-') != -1:
            country.append(orth_entries[fn]) # maps to original index
            language_family = lang.split('-')[0]
            if not language_family in LangCountrySets:
              LangCountrySets[language_family] = []
            LangCountrySets[language_family] += [orth_entries[fn]]

        total_leaves += len(charset.leaves)

    # Find unique leaves
    leaves = []
    for s in sets:
       for leaf_num in sorted(s.leaves.keys()):
           leaf = s.leaves[leaf_num]
           is_unique = True
           for existing_leaf in leaves:
               if leaves_equal(leaf, existing_leaf):
                  is_unique = False
                  break
           #print('unique: ', is_unique)
           if is_unique:
               leaves.append(leaf)

    # Find duplicate charsets
    duplicate = []
    for i, s in enumerate(sets):
        dup_num = None
        if i >= 1:
            for j, s_cmp in enumerate(sets):
                if j >= i:
                    break
                if s_cmp.equals(s):
                    dup_num = j
                    break

        duplicate.append(dup_num)

    tn = 0
    off = {}
    for i, s in enumerate(sets):
        if duplicate[i]:
            continue
        off[i] = tn
        tn += len(s.leaves)

    # Scan the input until the marker is found
    # FIXME: this is a bit silly really, might just as well hardcode
    #        the license header in the script and drop the template
    for line in tmpl_file:
        if line.strip() == '@@@':
            break
        print(line, end='')

    print('/* total size: {} unique leaves: {} */\n'.format(total_leaves, len(leaves)))

    print('#define LEAF0       ({} * sizeof (FcLangCharSet))'.format(len(sets)))
    print('#define OFF0        (LEAF0 + {} * sizeof (FcCharLeaf))'.format(len(leaves)))
    print('#define NUM0        (OFF0 + {} * sizeof (uintptr_t))'.format(tn))
    print('#define SET(n)      (n * sizeof (FcLangCharSet) + offsetof (FcLangCharSet, charset))')
    print('#define OFF(s,o)    (OFF0 + o * sizeof (uintptr_t) - SET(s))')
    print('#define NUM(s,n)    (NUM0 + n * sizeof (FcChar16) - SET(s))')
    print('#define LEAF(o,l)   (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))')
    print('#define fcLangCharSets (fcLangData.langCharSets)')
    print('#define fcLangCharSetIndices (fcLangData.langIndices)')
    print('#define fcLangCharSetIndicesInv (fcLangData.langIndicesInv)')

    assert len(sets) < 65536 # FIXME: need to change index type to 32-bit below then

    print('''
static const struct {{
    FcLangCharSet  langCharSets[{}];
    FcCharLeaf     leaves[{}];
    uintptr_t      leaf_offsets[{}];
    FcChar16       numbers[{}];
    {}       langIndices[{}];
    {}       langIndicesInv[{}];
}} fcLangData = {{'''.format(len(sets), len(leaves), tn, tn,
                             'FcChar16 ', len(sets), 'FcChar16 ', len(sets)))

    # Dump sets
    print('{')
    for i, s in enumerate(sets):
        if duplicate[i]:
            j = duplicate[i]
        else:
            j = i
        print('    {{ "{}",  {{ FC_REF_CONSTANT, {}, OFF({},{}), NUM({},{}) }} }}, /* {} */'.format(
		langs[i], len(sets[j].leaves), i, off[j], i, off[j], i))

    print('},')

    # Dump leaves
    print('{')
    for l, leaf in enumerate(leaves):
        print('    {{ {{ /* {} */'.format(l), end='')
        for i in range(0, 8): # 256/32 = 8
            if i % 4 == 0:
                print('\n   ', end='')
            print(' 0x{:08x},'.format(leaf[i]), end='')
        print('\n    } },')
    print('},')

    # Dump leaves
    print('{')
    for i, s in enumerate(sets):
        if duplicate[i]:
            continue

        print('    /* {} */'.format(names[i]))

        for n, leaf_num in enumerate(sorted(s.leaves.keys())):
            leaf = s.leaves[leaf_num]
            if n % 4 == 0:
                print('   ', end='')
            found = [k for k, unique_leaf in enumerate(leaves) if leaves_equal(unique_leaf,leaf)] 
            assert found, "Couldn't find leaf in unique leaves list!"
            assert len(found) == 1
            print(' LEAF({:3},{:3}),'.format(off[i], found[0]), end='')
            if n % 4 == 3:
                print('')
        if len(s.leaves) % 4 != 0:
            print('')

    print('},')
	
    print('{')
    for i, s in enumerate(sets):
        if duplicate[i]:
            continue

        print('    /* {} */'.format(names[i]))

        for n, leaf_num in enumerate(sorted(s.leaves.keys())):
            leaf = s.leaves[leaf_num]
            if n % 8 == 0:
                print('   ', end='')
            print(' 0x{:04x},'.format(leaf_num), end='')
            if n % 8 == 7:
                print('')
        if len(s.leaves) % 8 != 0:
            print('')

    print('},')

    # langIndices
    print('{')
    for i, s in enumerate(sets):
        fn = '{}.orth'.format(names[i])
        print('    {}, /* {} */'.format(orth_entries[fn], names[i]))
    print('},')

    # langIndicesInv
    print('{')
    for i, k in enumerate(orth_entries.keys()):
        name = get_name(k)
        idx = names.index(name)
        print('    {}, /* {} */'.format(idx, name))
    print('}')

    print('};\n')

    print('#define NUM_LANG_CHAR_SET	{}'.format(len(sets)))
    num_lang_set_map = (len(sets) + 31) // 32;
    print('#define NUM_LANG_SET_MAP	{}'.format(num_lang_set_map))

    # Dump indices with country codes
    assert len(country) > 0
    assert len(LangCountrySets) > 0
    print('')
    print('static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {')
    for k in sorted(LangCountrySets.keys()):
        langset_map = [0] * num_lang_set_map # initialise all zeros
        for entries_id in LangCountrySets[k]:
            langset_map[entries_id >> 5] |= (1 << (entries_id & 0x1f))
        print('    {', end='')
        for v in langset_map:
            print(' 0x{:08x},'.format(v), end='')
        print(' }}, /* {} */'.format(k))

    print('};\n')
    print('#define NUM_COUNTRY_SET {}\n'.format(len(LangCountrySets)))

    # Find ranges for each letter for faster searching
    # Dump sets start/finish for the fastpath
    print('static const FcLangCharSetRange  fcLangCharSetRanges[] = {\n')
    for c in string.ascii_lowercase: # a-z
        start = 9999
        stop = -1
        for i, s in enumerate(sets):
            if names[i].startswith(c):
                start = min(start,i)
                stop = max(stop,i)
        print('    {{ {}, {} }}, /* {} */'.format(start, stop, c))
    print('};\n')

    # And flush out the rest of the input file
    for line in tmpl_file:
        print(line, end='')
    
    sys.stdout.flush()
Add Meson build system See https://mesonbuild.com 2020-07-31 09:26:11 +02:00			`#!/usr/bin/env python3`
			`#`
			`# fontconfig/fc-lang/fc-lang.py`
			`#`
			`# Copyright © 2001-2002 Keith Packard`
			`# Copyright © 2019 Tim-Philipp Müller`
			`#`
			`# Permission to use, copy, modify, distribute, and sell this software and its`
			`# documentation for any purpose is hereby granted without fee, provided that`
			`# the above copyright notice appear in all copies and that both that`
			`# copyright notice and this permission notice appear in supporting`
			`# documentation, and that the name of the author(s) not be used in`
			`# advertising or publicity pertaining to distribution of the software without`
			`# specific, written prior permission. The authors make no`
			`# representations about the suitability of this software for any purpose. It`
			`# is provided "as is" without express or implied warranty.`
			`#`
			`# THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,`
			`# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO`
			`# EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR`
			`# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,`
			`# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER`
			`# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR`
			`# PERFORMANCE OF THIS SOFTWARE.`

			`# fc-lang`
			`#`
			`# Read a set of language orthographies and build C declarations for`
			`# charsets which can then be used to identify which languages are`
			`# supported by a given font.`
			`#`
			`# TODO: this code is not very pythonic, a lot of it is a 1:1 translation`
			`# of the C code and we could probably simplify it a bit`
			`import argparse`
			`import string`
			`import sys`
			`import os`

			`# we just store the leaves in a dict, we can order the leaves later if needed`
			`class CharSet:`
			`def __init__(self):`
			`self.leaves = {} # leaf_number -> leaf data (= 16 uint32)`

			`def add_char(self, ucs4):`
			`assert ucs4 < 0x01000000`
			`leaf_num = ucs4 >> 8`
			`if leaf_num in self.leaves:`
			`leaf = self.leaves[leaf_num]`
			`else:`
			`leaf = [0, 0, 0, 0, 0, 0, 0, 0] # 256/32 = 8`
			`self.leaves[leaf_num] = leaf`
			`leaf[(ucs4 & 0xff) >> 5] \|= (1 << (ucs4 & 0x1f))`
			`#print('{:08x} [{:04x}] --> {}'.format(ucs4, ucs4>>8, leaf))`

			`def del_char(self, ucs4):`
			`assert ucs4 < 0x01000000`
			`leaf_num = ucs4 >> 8`
			`if leaf_num in self.leaves:`
			`leaf = self.leaves[leaf_num]`
			`leaf[(ucs4 & 0xff) >> 5] &= ~(1 << (ucs4 & 0x1f))`
			`# We don't bother removing the leaf if it's empty */`
			`#print('{:08x} [{:04x}] --> {}'.format(ucs4, ucs4>>8, leaf))`

			`def equals(self, other_cs):`
			`keys = sorted(self.leaves.keys())`
			`other_keys = sorted(other_cs.leaves.keys())`
			`if len(keys) != len(other_keys):`
			`return False`
			`for k1, k2 in zip(keys, other_keys):`
			`if k1 != k2:`
			`return False`
			`if not leaves_equal(self.leaves[k1], other_cs.leaves[k2]):`
			`return False`
			`return True`

			`# Convert a file name into a name suitable for C declarations`
			`def get_name(file_name):`
			`return file_name.split('.')[0]`

			`# Convert a C name into a language name`
			`def get_lang(c_name):`
			`return c_name.replace('_', '-').replace(' ', '').lower()`

			`def read_orth_file(file_name):`
			`lines = []`
			`with open(file_name, 'r', encoding='utf-8') as orth_file:`
			`for num, line in enumerate(orth_file):`
			`if line.startswith('include '):`
			`include_fn = line[8:].strip()`
			`lines += read_orth_file(include_fn)`
			`else:`
			`# remove comments and strip whitespaces`
			`line = line.split('#')[0].strip()`
			`line = line.split('\t')[0].strip()`
			`# skip empty lines`
			`if line:`
			`lines += [(file_name, num, line)]`

			`return lines`

			`def leaves_equal(leaf1, leaf2):`
			`for v1, v2 in zip(leaf1, leaf2):`
			`if v1 != v2:`
			`return False`
			`return True`

			`# Build a single charset from a source file`
			`#`
			`# The file format is quite simple, either`
			`# a single hex value or a pair separated with a dash`
			`def parse_orth_file(file_name, lines):`
			`charset = CharSet()`
			`for fn, num, line in lines:`
			`delete_char = line.startswith('-')`
			`if delete_char:`
			`line = line[1:]`
			`if line.find('-') != -1:`
			`parts = line.split('-')`
			`elif line.find('..') != -1:`
			`parts = line.split('..')`
			`else:`
			`parts = [line]`

			`start = int(parts.pop(0), 16)`
			`end = start`
			`if parts:`
			`end = int(parts.pop(0), 16)`
			`if parts:`
			`print('ERROR: {} line {}: parse error (too many parts)'.format(fn, num))`

			`for ucs4 in range(start, end+1):`
			`if delete_char:`
			`charset.del_char(ucs4)`
			`else:`
			`charset.add_char(ucs4)`

			`assert charset.equals(charset) # sanity check for the equals function`

			`return charset`

			`if __name__=='__main__':`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument('orth_files', nargs='+', help='List of .orth files')`
			`parser.add_argument('--directory', dest='directory', default=None)`
			`parser.add_argument('--template', dest='template_file', default=None)`
			`parser.add_argument('--output', dest='output_file', default=None)`

			`args = parser.parse_args()`

			`sets = []`
			`names = []`
			`langs = []`
			`country = []`

			`total_leaves = 0`

			`LangCountrySets = {}`

			`# Open output file`
			`if args.output_file:`
			`sys.stdout = open(args.output_file, 'w', encoding='utf-8')`

			`# Read the template file`
			`if args.template_file:`
			`tmpl_file = open(args.template_file, 'r', encoding='utf-8')`
			`else:`
			`tmpl_file = sys.stdin`

			`# Change into source dir if specified (after opening other files)`
			`if args.directory:`
			`os.chdir(args.directory)`

			`orth_entries = {}`
			`for i, fn in enumerate(args.orth_files):`
			`orth_entries[fn] = i`

			`for fn in sorted(orth_entries.keys()):`
			`lines = read_orth_file(fn)`
			`charset = parse_orth_file(fn, lines)`

			`sets.append(charset)`

			`name = get_name(fn)`
			`names.append(name)`

			`lang = get_lang(name)`
			`langs.append(lang)`
			`if lang.find('-') != -1:`
			`country.append(orth_entries[fn]) # maps to original index`
			`language_family = lang.split('-')[0]`
			`if not language_family in LangCountrySets:`
			`LangCountrySets[language_family] = []`
			`LangCountrySets[language_family] += [orth_entries[fn]]`

			`total_leaves += len(charset.leaves)`

			`# Find unique leaves`
			`leaves = []`
			`for s in sets:`
			`for leaf_num in sorted(s.leaves.keys()):`
			`leaf = s.leaves[leaf_num]`
			`is_unique = True`
			`for existing_leaf in leaves:`
			`if leaves_equal(leaf, existing_leaf):`
			`is_unique = False`
			`break`
			`#print('unique: ', is_unique)`
			`if is_unique:`
			`leaves.append(leaf)`

			`# Find duplicate charsets`
			`duplicate = []`
			`for i, s in enumerate(sets):`
			`dup_num = None`
			`if i >= 1:`
			`for j, s_cmp in enumerate(sets):`
			`if j >= i:`
			`break`
			`if s_cmp.equals(s):`
			`dup_num = j`
			`break`

			`duplicate.append(dup_num)`

			`tn = 0`
			`off = {}`
			`for i, s in enumerate(sets):`
			`if duplicate[i]:`
			`continue`
			`off[i] = tn`
			`tn += len(s.leaves)`

			`# Scan the input until the marker is found`
			`# FIXME: this is a bit silly really, might just as well hardcode`
			`# the license header in the script and drop the template`
			`for line in tmpl_file:`
			`if line.strip() == '@@@':`
			`break`
			`print(line, end='')`

			`print('/* total size: {} unique leaves: {} */\n'.format(total_leaves, len(leaves)))`

			`print('#define LEAF0 ({} * sizeof (FcLangCharSet))'.format(len(sets)))`
			`print('#define OFF0 (LEAF0 + {} * sizeof (FcCharLeaf))'.format(len(leaves)))`
			`print('#define NUM0 (OFF0 + {} * sizeof (uintptr_t))'.format(tn))`
			`print('#define SET(n) (n * sizeof (FcLangCharSet) + offsetof (FcLangCharSet, charset))')`
			`print('#define OFF(s,o) (OFF0 + o * sizeof (uintptr_t) - SET(s))')`
			`print('#define NUM(s,n) (NUM0 + n * sizeof (FcChar16) - SET(s))')`
			`print('#define LEAF(o,l) (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))')`
			`print('#define fcLangCharSets (fcLangData.langCharSets)')`
			`print('#define fcLangCharSetIndices (fcLangData.langIndices)')`
			`print('#define fcLangCharSetIndicesInv (fcLangData.langIndicesInv)')`

Change index type to 16 bit and bump cache version to 9 The number of .orth files is now > 256, therefore the index type needs to be 16 bit. 2022-12-05 11:07:59 +01:00			`assert len(sets) < 65536 # FIXME: need to change index type to 32-bit below then`
Add Meson build system See https://mesonbuild.com 2020-07-31 09:26:11 +02:00
			`print('''`
			`static const struct {{`
			`FcLangCharSet langCharSets[{}];`
			`FcCharLeaf leaves[{}];`
			`uintptr_t leaf_offsets[{}];`
			`FcChar16 numbers[{}];`
			`{} langIndices[{}];`
			`{} langIndicesInv[{}];`
			`}} fcLangData = {{'''.format(len(sets), len(leaves), tn, tn,`
Change index type to 16 bit and bump cache version to 9 The number of .orth files is now > 256, therefore the index type needs to be 16 bit. 2022-12-05 11:07:59 +01:00			`'FcChar16 ', len(sets), 'FcChar16 ', len(sets)))`
Add Meson build system See https://mesonbuild.com 2020-07-31 09:26:11 +02:00
			`# Dump sets`
			`print('{')`
			`for i, s in enumerate(sets):`
			`if duplicate[i]:`
			`j = duplicate[i]`
			`else:`
			`j = i`
			`print(' {{ "{}", {{ FC_REF_CONSTANT, {}, OFF({},{}), NUM({},{}) }} }}, /* {} */'.format(`
			`langs[i], len(sets[j].leaves), i, off[j], i, off[j], i))`

			`print('},')`

			`# Dump leaves`
			`print('{')`
			`for l, leaf in enumerate(leaves):`
			`print(' {{ {{ /* {} */'.format(l), end='')`
			`for i in range(0, 8): # 256/32 = 8`
			`if i % 4 == 0:`
			`print('\n ', end='')`
			`print(' 0x{:08x},'.format(leaf[i]), end='')`
			`print('\n } },')`
			`print('},')`

			`# Dump leaves`
			`print('{')`
			`for i, s in enumerate(sets):`
			`if duplicate[i]:`
			`continue`

			`print(' /* {} */'.format(names[i]))`

			`for n, leaf_num in enumerate(sorted(s.leaves.keys())):`
			`leaf = s.leaves[leaf_num]`
			`if n % 4 == 0:`
			`print(' ', end='')`
			`found = [k for k, unique_leaf in enumerate(leaves) if leaves_equal(unique_leaf,leaf)]`
			`assert found, "Couldn't find leaf in unique leaves list!"`
			`assert len(found) == 1`
			`print(' LEAF({:3},{:3}),'.format(off[i], found[0]), end='')`
			`if n % 4 == 3:`
			`print('')`
			`if len(s.leaves) % 4 != 0:`
			`print('')`

			`print('},')`

			`print('{')`
			`for i, s in enumerate(sets):`
			`if duplicate[i]:`
			`continue`

			`print(' /* {} */'.format(names[i]))`

			`for n, leaf_num in enumerate(sorted(s.leaves.keys())):`
			`leaf = s.leaves[leaf_num]`
			`if n % 8 == 0:`
			`print(' ', end='')`
			`print(' 0x{:04x},'.format(leaf_num), end='')`
			`if n % 8 == 7:`
			`print('')`
			`if len(s.leaves) % 8 != 0:`
			`print('')`

			`print('},')`

			`# langIndices`
			`print('{')`
			`for i, s in enumerate(sets):`
			`fn = '{}.orth'.format(names[i])`
			`print(' {}, /* {} */'.format(orth_entries[fn], names[i]))`
			`print('},')`

			`# langIndicesInv`
			`print('{')`
			`for i, k in enumerate(orth_entries.keys()):`
			`name = get_name(k)`
			`idx = names.index(name)`
			`print(' {}, /* {} */'.format(idx, name))`
			`print('}')`

			`print('};\n')`

			`print('#define NUM_LANG_CHAR_SET {}'.format(len(sets)))`
			`num_lang_set_map = (len(sets) + 31) // 32;`
			`print('#define NUM_LANG_SET_MAP {}'.format(num_lang_set_map))`

			`# Dump indices with country codes`
			`assert len(country) > 0`
			`assert len(LangCountrySets) > 0`
			`print('')`
			`print('static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {')`
			`for k in sorted(LangCountrySets.keys()):`
			`langset_map = [0] * num_lang_set_map # initialise all zeros`
			`for entries_id in LangCountrySets[k]:`
			`langset_map[entries_id >> 5] \|= (1 << (entries_id & 0x1f))`
			`print(' {', end='')`
			`for v in langset_map:`
			`print(' 0x{:08x},'.format(v), end='')`
			`print(' }}, /* {} */'.format(k))`

			`print('};\n')`
			`print('#define NUM_COUNTRY_SET {}\n'.format(len(LangCountrySets)))`

			`# Find ranges for each letter for faster searching`
			`# Dump sets start/finish for the fastpath`
			`print('static const FcLangCharSetRange fcLangCharSetRanges[] = {\n')`
			`for c in string.ascii_lowercase: # a-z`
			`start = 9999`
			`stop = -1`
			`for i, s in enumerate(sets):`
			`if names[i].startswith(c):`
			`start = min(start,i)`
			`stop = max(stop,i)`
			`print(' {{ {}, {} }}, /* {} */'.format(start, stop, c))`
			`print('};\n')`

			`# And flush out the rest of the input file`
			`for line in tmpl_file:`
			`print(line, end='')`

			`sys.stdout.flush()`