Add fc-lang program to generate language coverage tables

2002-07-06 23:21:37 +00:00 · 2002-07-06 23:21:37 +00:00 · c1382a3d99
parent 084407063d
commit c1382a3d99
68 changed files with 44053 additions and 0 deletions
--- a/fc-lang/Imakefile
+++ b/fc-lang/Imakefile
@ -0,0 +1,44 @@
+#ifdef UseInstalled
+/* building outside the tree, use private defines */
+#include "../local.def"
+#endif
+
+INCLUDES=$(FREETYPE2INCLUDES) $(LIBXML2INCLUDES) -I.. -I../src
+
+LOCAL_LIBRARIES=FontconfigClientLibs
+        DEPLIBS=FontconfigClientDepLibs
+
+SRCS=fc-lang.c
+OBJS=fc-lang.o
+
+TARG=fclang.c
+TMPL=fclang.tmpl.c
+
+#
+# Basic ISO 639-1 two letter language names
+ORTH1=ab.orth ar.orth az.orth ba.orth be.orth bg.orth br.orth ca.orth co.orth\
+      cs.orth da.orth de.orth el.orth en.orth eo.orth es.orth et.orth eu.orth\
+      fi.orth fo.orth fr.orth fy.orth ga.orth gd.orth gl.orth he.orth hr.orth\
+      hu.orth hy.orth is.orth it.orth ja.orth ka.orth kk.orth kl.orth ko.orth\
+      la.orth lt.orth lv.orth mk.orth mo.orth mt.orth nl.orth no.orth oc.orth\
+      pl.orth pt.orth rm.orth ro.orth ru.orth sh.orth sk.orth sl.orth sq.orth\
+      sr.orth sv.orth th.orth tr.orth uk.orth vo.orth yi.orth zh_cn.orth\
+      zh_tw.orth
+
+#
+# ISO 639-2 adds many more three letter language names
+#
+ORTH2=chr.orth
+
+ORTH=$(ORTH1) $(ORTH2)
+
+all:: $(TARG)
+
+clean::
+	$(RM) $(TARG)
+	
+$(TARG): fc-lang $(ORTH) $(TMPL)
+	./fc-lang $(ORTH) < $(TMPL) > $(TARG)
+
+ComplexProgramTarget(fc-lang)
+LinkBuildBinary(ProgramTargetName(fc-lang))
--- a/fc-lang/ab.orth
+++ b/fc-lang/ab.orth
@ -0,0 +1,17 @@
+# Abkhazia (AB)
+00ab
+00bb
+0401
+040f
+0410-044f
+0451
+045f
+049e-049f
+04a6-04a9
+04ac-04ad
+04b2-04b7
+04bc-04bf
+04d8
+04d9
+04e0-0re1
+#2039-203a	# angle quotes
--- a/fc-lang/ar.orth
+++ b/fc-lang/ar.orth
@ -0,0 +1,49 @@
+# Arabic (AR)
+060C	# ARABIC COMMA
+061B	# ARABIC SEMICOLON
+061F	# ARABIC QUESTION MARK
+0621	# ARABIC LETTER HAMZA
+0622	# ARABIC LETTER ALEF WITH MADDA ABOVE
+0623	# ARABIC LETTER ALEF WITH HAMZA ABOVE
+0624	# ARABIC LETTER WAW WITH HAMZA ABOVE
+0625	# ARABIC LETTER ALEF WITH HAMZA BELOW
+0626	# ARABIC LETTER YEH WITH HAMZA ABOVE
+0627	# ARABIC LETTER ALEF
+0628	# ARABIC LETTER BEH
+0629	# ARABIC LETTER TEH MARBUTA
+062A	# ARABIC LETTER TEH
+062B	# ARABIC LETTER THEH
+062C	# ARABIC LETTER JEEM
+062D	# ARABIC LETTER HAH
+062E	# ARABIC LETTER KHAH
+062F	# ARABIC LETTER DAL
+0630	# ARABIC LETTER THAL
+0631	# ARABIC LETTER REH
+0632	# ARABIC LETTER ZAIN
+0633	# ARABIC LETTER SEEN
+0634	# ARABIC LETTER SHEEN
+0635	# ARABIC LETTER SAD
+0636	# ARABIC LETTER DAD
+0637	# ARABIC LETTER TAH
+0638	# ARABIC LETTER ZAH
+0639	# ARABIC LETTER AIN
+063A	# ARABIC LETTER GHAIN
+0640	# ARABIC TATWEEL
+0641	# ARABIC LETTER FEH
+0642	# ARABIC LETTER QAF
+0643	# ARABIC LETTER KAF
+0644	# ARABIC LETTER LAM
+0645	# ARABIC LETTER MEEM
+0646	# ARABIC LETTER NOON
+0647	# ARABIC LETTER HEH
+0648	# ARABIC LETTER WAW
+0649	# ARABIC LETTER ALEF MAKSURA
+064A	# ARABIC LETTER YEH
+064B	# ARABIC FATHATAN
+064C	# ARABIC DAMMATAN
+064D	# ARABIC KASRATAN
+064E	# ARABIC FATHA
+064F	# ARABIC DAMMA
+0650	# ARABIC KASRA
+0651	# ARABIC SHADDA
+0652	# ARABIC SUKUN
--- a/fc-lang/az.orth
+++ b/fc-lang/az.orth
@ -0,0 +1,26 @@
+# Azerbaijani (AZ)
+0040-005a
+0060-007a
+00c4
+00c7
+00d6
+00dc
+00e4
+00e7
+00f6
+00fc
+011e-011f
+0130-0131
+015e-015f
+018f
+0259
+02bc
+0408
+0410-044f
+0458
+0492-0493
+049c-049d
+04ae-04af
+04b8-04bb
+04d8-04d9
+04e8-04e9
--- a/fc-lang/ba.orth
+++ b/fc-lang/ba.orth
@ -0,0 +1,16 @@
+# Bashkir (BA)
+00ab
+00bb
+0401
+0410-044f
+0451
+0492-0493
+0498-0499
+04a0-04a3
+04aa-04ab
+04ae-04af
+04d8-04d9
+04e8-04e9
+#2018-2019	# single quotes
+#201c-201d	# double quotes
+#2039-203a	# angle quotes
--- a/fc-lang/be.orth
+++ b/fc-lang/be.orth
@ -0,0 +1,11 @@
+# Byelorussian (BE)
+00ab
+00bb
+0401
+0406
+040e
+0410-044f
+0451
+0456
+045e
+#2039-203a	# angle quotes
--- a/fc-lang/bg.orth
+++ b/fc-lang/bg.orth
@ -0,0 +1,12 @@
+# Bulgarian (BG)
+0400
+04ad
+0410-042a
+042c
+042e-044a
+044c
+044e-044f
+0450
+045d
+0462-0463
+046a-046b
--- a/fc-lang/br.orth
+++ b/fc-lang/br.orth
@ -0,0 +1,17 @@
+# Breton (BR)
+0027
+0040-005a
+0060-007a
+00ab
+00bb
+00c2
+00ca
+00d1
+00d9
+00dc
+00e2
+00ea
+00f1
+00f9
+00fc
+#2019-201a	# single quote and comma
--- a/fc-lang/ca.orth
+++ b/fc-lang/ca.orth
@ -0,0 +1,23 @@
+# Catalan (CA)
+0040-005a
+0060-007a
+00b7
+00c0
+00c7
+00c8-00c9
+00cd
+00cf
+00d2-00d3
+00da
+00dc
+00e0
+00e7
+00e8-00e9
+00ed
+00ef
+00f2-00f3
+00fa
+00fc
+013f-0140
+#2018-2019	# single quotes
+#201c-201d	# double quotes
--- a/fc-lang/chr.orth
+++ b/fc-lang/chr.orth
@ -0,0 +1,2 @@
+# Cherokee (chr)
+13a0-13f4
--- a/fc-lang/co.orth
+++ b/fc-lang/co.orth
@ -0,0 +1,2 @@
+# Corsican (CO)
+include fr.orth
--- a/fc-lang/cs.orth
+++ b/fc-lang/cs.orth
@ -0,0 +1,27 @@
+# Czech (CS)
+0040-005a
+0060-007a
+00c1
+00c4
+00c9
+00cd
+00d3
+00d6
+00da
+00dc-00dd
+00e1
+00e4
+00e9
+00ed
+00f3
+00f6
+00fa
+00fc-00fd
+010c-010f
+011a-011b
+0147-0148
+0158-0159
+0160-0161
+0164-0165
+016e-016f
+017d-017e
--- a/fc-lang/da.orth
+++ b/fc-lang/da.orth
@ -0,0 +1,27 @@
+# Danish (DA)
+0040-005a
+0060-007a
+00ab
+00bb
+00c0-00c2
+00c4-00cb
+00cd
+00d0
+00d3-00d4
+00d6
+00d8
+00da
+00dc-00de
+00e0-00e2
+00e4-00eb
+00ed
+00f0
+00f3-00f4
+00f6
+00f8
+00fa
+00fc-00fe
+0152-0153
+01fa-01ff
+#2039-203a	# angle quotes
+
--- a/fc-lang/de.orth
+++ b/fc-lang/de.orth
@ -0,0 +1,17 @@
+# German (DE)
+00ab
+00bb
+0040-005a
+0060-007a
+00c4
+00d6
+00dc
+00df
+00e4
+00f6
+00fc
+#2018	# single quotes
+#201a	# single quotes
+#201c	# double quotes
+#201e	# double quotes
+#2039-203a	# angle quotes
--- a/fc-lang/el.orth
+++ b/fc-lang/el.orth
@ -0,0 +1,26 @@
+# Greek (EL)
+0374-0375
+037a
+037e
+0384-038a
+038c
+038e-03a1
+03a3-03ce
+03d7
+03da-03e1
+#1f00-1f15	# only for polytonic orthography below...
+#1f18-1f1d
+#1f20-1f45
+#1f48-1f4d
+#1f50-1f57
+#1f59
+#1f5b
+#1f5d
+#1f5f-157d
+#1f80-1fb4
+#1fb6-1fc4
+#1fc6-1fd3
+#1fd6-1fdb
+#1fdd-1fef
+#1ff2-1ff4
+#1ff6-1ffe
--- a/fc-lang/en.orth
+++ b/fc-lang/en.orth
@ -0,0 +1,17 @@
+# English (EN)
+0040-005a
+0060-007a
+00c0
+00c7-00cb
+00cf
+00d1
+00d4
+00d6
+00d0
+00d7-00db
+00df
+00f1
+00f4
+00f6
+#2018-2019	# single quotes
+#201c-201d	# double quotes
--- a/fc-lang/eo.orth
+++ b/fc-lang/eo.orth
@ -0,0 +1,9 @@
+# Esperanto (EO)
+0040-005a
+0060-007a
+0108-0109
+011c-011d
+0124-0125
+0134-0135
+015c-015d
+016c-016d
--- a/fc-lang/es.orth
+++ b/fc-lang/es.orth
@ -0,0 +1,19 @@
+# Spanish (ES)
+0040-005a
+0060-007a
+00a1
+00bf
+00c1
+00c9
+00cd
+00d1
+00d3
+00da
+00dc
+00e1
+00e9
+00ed
+00f1
+00f3
+00fa
+00fc
--- a/fc-lang/et.orth
+++ b/fc-lang/et.orth
@ -0,0 +1,15 @@
+# Estonian (ET)
+0040-005a
+0060-007a
+00c4
+00d5-00d6
+00dc
+00e4
+00f5-00f6
+00fc
+0160-0161
+017d-017e
+#2018	# single quote
+#201a	# single quote
+#201c	# double quote
+#201e	# double quote
--- a/fc-lang/eu.orth
+++ b/fc-lang/eu.orth
@ -0,0 +1,8 @@
+# Basque (EU)
+0040-005a
+0060-007a
+00d1
+00dc
+00f1
+00fc
+0154-0155
--- a/fc-lang/fc-lang.c
+++ b/fc-lang/fc-lang.c
@ -0,0 +1,295 @@
+/*
+ * $XFree86$
+ *
+ * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "fcint.h"
+
+/*
+ * fc-lang
+ *
+ * Read a set of language orthographies and build C declarations for
+ * charsets which can then be used to identify which languages are
+ * supported by a given font.  Note that it would be nice if
+ * this could be done while compiling the library, but this
+ * code uses a number of routines from the library.  It's
+ * expediant to just ship the pre-built version along with the
+ * source orthographies.
+ */
+
+static void 
+fatal (char *file, int lineno, char *msg)
+{
+    fprintf (stderr, "%s:%d: %s\n", file, lineno, msg);
+    exit (1);
+}
+
+static char *
+get_line (FILE *f, char *line, int *lineno)
+{
+    char    *hash;
+    if (!fgets (line, 1024, f))
+	return 0;
+    ++(*lineno);
+    hash = strchr (line, '#');
+    if (hash)
+	*hash = '\0';
+    if (line[0] == '\0' || line[0] == '\n' || line[0] == '\032' || line[0] == '\r')
+	return get_line (f, line, lineno);
+    return line;
+}
+
+/*
+ * build a single charset from a source file
+ *
+ * The file format is quite simple, either
+ * a single hex value or a pair separated with a dash
+ *
+ * Comments begin with '#'
+ */
+
+static FcCharSet *
+scan (FILE *f, char *file)
+{
+    FcCharSet	*c = 0;
+    FcCharSet	*n;
+    int		start, end, ucs4;
+    char	line[1024];
+    int		lineno = 0;
+
+    while (get_line (f, line, &lineno))
+    {
+	if (!strncmp (line, "include", 7))
+	{
+	    file = strchr (line, ' ');
+	    while (*file == ' ')
+		file++;
+	    end = strlen (file);
+	    if (file[end-1] == '\n')
+		file[end-1] = '\0';
+	    f = fopen (file, "r");
+	    if (!f)
+		fatal (file, 0, "can't open");
+	    c = scan (f, file);
+	    fclose (f);
+	    return c;
+	}
+	if (strchr (line, '-'))
+	{
+	    if (sscanf (line, "%x-%x", &start, &end) != 2)
+		fatal (file, lineno, "parse error");
+	}
+	else
+	{
+	    if (sscanf (line, "%x", &start) != 1)
+		fatal (file, lineno, "parse error");
+	    end = start;
+	}
+	if (!c)
+	    c = FcCharSetCreate ();
+	for (ucs4 = start; ucs4 <= end; ucs4++)
+	{
+	    if (!FcCharSetAddChar (c, ucs4))
+		fatal (file, lineno, "out of memory");
+	}
+    }
+    n = FcCharSetFreeze (c);
+    FcCharSetDestroy (c);
+    return n;
+}
+
+/*
+ * Convert a file name into a name suitable for C declarations
+ */
+static char *
+get_name (char *file)
+{
+    char    *name;
+    char    *dot;
+
+    dot = strchr (file, '.');
+    if (!dot)
+	dot = file + strlen(file);
+    name = malloc (dot - file + 1);
+    strncpy (name, file, dot - file);
+    name[dot-file] = '\0';
+    return name;
+}
+
+/*
+ * Convert a C name into a language name
+ */
+static char *
+get_lang (char *name)
+{
+    char    *lang = malloc (strlen (name) + 1);
+    char    *l = lang;
+    char    c;
+
+    while ((c = *name++))
+    {
+	if (isupper (c))
+	    c = tolower (c);
+	if (c == '_')
+	    c = '-';
+	if (c == ' ')
+	    continue;
+	*l++ = c;
+    }
+    *l++ = '\0';
+    return lang;
+}
+
+int
+main (int argc, char **argv)
+{
+    FcCharSet	*sets[1024];
+    char	*names[1024];
+    FILE	*f;
+    int		i = 0;
+    FcCharLeaf	**leaves, **sleaves;
+    int		total_leaves = 0;
+    int		l, sl, tl;
+    char	line[1024];
+    
+    while (*++argv)
+    {
+	f = fopen (*argv, "r");
+	if (!f)
+	    fatal (*argv, 0, strerror (errno));
+	sets[i] = scan (f, *argv);
+	names[i] = get_name (*argv);
+	total_leaves += sets[i]->num;
+	i++;
+	fclose (f);
+    }
+    sets[i] = 0;
+    leaves = malloc (total_leaves * sizeof (FcCharLeaf *));
+    tl = 0;
+    /*
+     * Find unique leaves
+     */
+    for (i = 0; sets[i]; i++)
+    {
+	sleaves = sets[i]->leaves;
+	for (sl = 0; sl < sets[i]->num; sl++)
+	{
+	    for (l = 0; l < tl; l++)
+		if (leaves[l] == sleaves[sl])
+		    break;
+	    if (l == tl)
+		leaves[tl++] = sleaves[sl];
+	}
+    }
+
+    /*
+     * Scan the input until the marker is found
+     */
+    
+    while (fgets (line, sizeof (line), stdin))
+    {
+	if (!strncmp (line, "@@@", 3))
+	    break;
+	fputs (line, stdout);
+    }
+    
+    printf ("/* total size: %d unique leaves: %d */\n\n",
+	    total_leaves, tl);
+    /*
+     * Dump leaves
+     */
+    printf ("static const FcCharLeaf	leaves[%d] = {\n", tl);
+    for (l = 0; l < tl; l++)
+    {
+	printf ("    { { /* %d */", l);
+	for (i = 0; i < 256/32; i++)
+	{
+	    if (i % 4 == 0)
+		printf ("\n   ");
+	    printf (" 0x%08x,", leaves[l]->map[i]);
+	}
+	printf ("\n    } },\n");
+    }
+    printf ("};\n\n");
+    printf ("#define L(n) ((FcCharLeaf *) &leaves[n])\n\n");
+    /*
+     * Dump arrays
+     */
+    for (i = 0; sets[i]; i++)
+    {
+	int n;
+	
+	printf ("static const FcCharLeaf *leaves_%s[%d] = {\n",
+		names[i], sets[i]->num);
+	for (n = 0; n < sets[i]->num; n++)
+	{
+	    if (n % 8 == 0)
+		printf ("   ");
+	    for (l = 0; l < tl; l++)
+		if (leaves[l] == sets[i]->leaves[n])
+		    break;
+	    if (l == tl)
+		fatal (names[i], 0, "can't find leaf");
+	    printf (" L(%3d),", l);
+	    if (n % 8 == 7)
+		printf ("\n");
+	}
+	if (n % 8 != 0)
+	    printf ("\n");
+	printf ("};\n\n");
+	
+
+	printf ("static const FcChar16 numbers_%s[%d] = {\n",
+		names[i], sets[i]->num);
+	for (n = 0; n < sets[i]->num; n++)
+	{
+	    if (n % 8 == 0)
+		printf ("   ");
+	    printf (" 0x%04x,", sets[i]->numbers[n]);
+	    if (n % 8 == 7)
+		printf ("\n");
+	}
+	if (n % 8 != 0)
+	    printf ("\n");
+	printf ("};\n\n");
+    }
+    printf ("#undef L\n\n");
+    /*
+     * Dump sets
+     */
+    printf ("static const FcLangCharSet  fcLangCharSets[] = {\n");
+    for (i = 0; sets[i]; i++)
+    {
+	printf ("    { (FcChar8 *) \"%s\",\n"
+		"      { 1, FcTrue, %d, "
+		"(FcCharLeaf **) leaves_%s, "
+		"(FcChar16 *) numbers_%s } },\n",
+		get_lang(names[i]),
+		sets[i]->num, names[i], names[i]);
+    }
+    printf ("};\n\n");
+    while (fgets (line, sizeof (line), stdin))
+	fputs (line, stdout);
+    
+    fflush (stdout);
+    exit (ferror (stdout));
+}
--- a/fc-lang/fc-lang.man
+++ b/fc-lang/fc-lang.man
@ -0,0 +1,41 @@
+.\"
+.\" Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
+.\"
+.\" Permission to use, copy, modify, distribute, and sell this software and its
+.\" documentation for any purpose is hereby granted without fee, provided that
+.\" the above copyright notice appear in all copies and that both that
+.\" copyright notice and this permission notice appear in supporting
+.\" documentation, and that the name of Keith Packard not be used in
+.\" advertising or publicity pertaining to distribution of the software without
+.\" specific, written prior permission.  Keith Packard makes no
+.\" representations about the suitability of this software for any purpose.  It
+.\" is provided "as is" without express or implied warranty.
+.\"
+.\" KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+.\" INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+.\" EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+.\" CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+.\" DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+.\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+.\" PERFORMANCE OF THIS SOFTWARE.
+.\"
+.\"
+.\" $XFree86: xc/programs/fc-lang/fc-lang.man,v 1.3 2001/02/09 03:47:56 tsi Exp $
+.\"
+.TH FC-LANG 1 __vendorversion__
+.SH NAME
+fc-lang, fclang.c \- create an database of language orthographies
+.SH SYNOPSIS
+.B "fc-lang"
+.RI [ language-coverage
+\|.\|.\|. ]
+.SH DESCRIPTION
+.I Fc-lang
+builds the fclang.c file used in the fontconfig library to automatically
+determine language coverage for fonts which don't contain this information.
+.SH FILES
+.TP 15
+.B fclang.tmpl.c
+The template file in which the tables are inserted
+.SH "SEE ALSO"
+fontconfig(3)
--- a/fc-lang/fclang.tmpl.c
+++ b/fc-lang/fclang.tmpl.c
@ -0,0 +1,130 @@
+/*
+ * $XFree86$
+ *
+ * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "fcint.h"
+
+typedef struct {
+    FcChar8	*lang;
+    FcCharSet	charset;
+} FcLangCharSet;
+
+@@@
+
+#define NUM_LANG_CHAR_SET	(sizeof (fcLangCharSets) / sizeof (fcLangCharSets[0]))
+						 
+FcBool
+FcFreeTypeSetLang (FcPattern *pattern, FcCharSet *charset)
+{
+    int		i;
+    FcChar32	missing;
+
+    for (i = 0; i < NUM_LANG_CHAR_SET; i++)
+    {
+	missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
+        if (FcDebug() & FC_DBG_SCANV)
+	    printf ("%s(%d) ", fcLangCharSets[i].lang, missing);
+	if (!missing && !FcFreeTypeHasLang (pattern, fcLangCharSets[i].lang))
+	    if (!FcPatternAddString (pattern, FC_LANG, fcLangCharSets[i].lang))
+		return FcFalse;
+    }
+    if (FcDebug() & FC_DBG_SCANV)
+	printf ("\n");
+    return FcTrue;
+}
+
+
+FcLangResult
+FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
+{
+    const FcChar8   *orig_s1 = s1;
+    FcChar8	    c1, c2;
+    FcLangResult    result;
+    /*
+     * Compare ISO 639 language codes
+     */
+    for (;;)
+    {
+	c1 = *s1++;
+	c2 = *s2++;
+	if (c1 == '\0' || c1 == '-')
+	    break;
+	if (c2 == '\0' || c2 == '-')
+	    break;
+	c1 = FcToLower (c1);
+	c2 = FcToLower (c2);
+	if (c1 != c2)
+	    return FcLangDifferentLang;	    /* mismatching lang code */
+    }
+    if (!c1 && !c2)
+	return FcLangEqual;
+    /*
+     * Make x-* mismatch as if the lang part didn't match
+     */
+    result = FcLangDifferentCountry;
+    if (orig_s1[0] == 'x' && (orig_s1[1] == '\0' || orig_s1[1] == '-'))
+	result = FcLangDifferentLang;
+    
+    if (c1 == '\0' || c2 == '\0')
+	return result;
+    /*
+     * Compare ISO 3166 country codes
+     */
+    for (;;)
+    {
+	c1 = *s1++;
+	c2 = *s2++;
+	if (!c1 || !c2)
+	    break;
+	c1 = FcToLower (c1);
+	c2 = FcToLower (c2);
+	if (c1 != c2)
+	    break;
+    }
+    if (c1 == c2)
+	return FcLangEqual;
+    else
+	return result;
+}
+
+const FcCharSet *
+FcCharSetForLang (const FcChar8 *lang)
+{
+    int		i;
+    int		country = -1;
+    for (i = 0; i < NUM_LANG_CHAR_SET; i++)
+    {
+	switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
+	case FcLangEqual:
+	    return &fcLangCharSets[i].charset;
+	case FcLangDifferentCountry:
+	    if (country == -1)
+		country = i;
+	default:
+	    break;
+	}
+    }
+    if (country == -1)
+	return 0;
+    return &fcLangCharSets[i].charset;
+}
--- a/fc-lang/fi.orth
+++ b/fc-lang/fi.orth
@ -0,0 +1,15 @@
+# Finnish (FI)
+0040-005a
+0060-007a
+00bb
+00c4-00c6
+00d5-00d6
+00dc
+00e4-00e6
+00f5-00f6
+00fc
+0160-0161
+017d-017e
+#2019	# single quote
+#201d	# double quote
+#203a	# angle quote
--- a/fc-lang/fo.orth
+++ b/fc-lang/fo.orth
@ -0,0 +1,25 @@
+# Faroese (FO)
+0040-005a
+0060-007a
+00c1
+00c4-00c6
+00cd
+00d0
+00d3
+00d6
+00d8
+00da
+00dc-00dd
+00e1
+00e4-00e6
+00ed
+00f0
+00f3
+00f6
+00f8
+00fa
+00fc-00fd
+#2018	# single quote
+#201a	# single quote
+#201c	# double quote
+#201e	# double quote
--- a/fc-lang/fr.orth
+++ b/fc-lang/fr.orth
@ -0,0 +1,28 @@
+# French (FR)
+0040-005a
+0060-007a
+00ab
+00bb
+00c0
+00c2
+00c6
+00c8-00cb
+00ce-00cf
+00d1
+00d4
+00d9
+00db
+00e0
+00e2
+00e6
+00e8-00eb
+00ee-00ef
+00f1
+00f4
+00f9
+00fb
+00ff
+#0152-0153 	# Oe and oe
+#0178		# Y with diaresis
+#2019-201a	# single quotes
+#2039-203a	# angle quotes
--- a/fc-lang/fy.orth
+++ b/fc-lang/fy.orth
@ -0,0 +1,2 @@
+# Frisian (FY)
+include de.orth
--- a/fc-lang/ga.orth
+++ b/fc-lang/ga.orth
@ -0,0 +1,29 @@
+# Irish (GA)
+0040-005a
+0060-007a
+00c0-00c1
+00c7-00c9
+00cc-00cd
+00d2-00d3
+00d9-00da
+00e0-00e1
+00e7-00e9
+00ec-00ed
+00f2-00f3
+00f9-00fa
+010a-010b
+0120-0121
+017f
+027c
+1e02-1e03
+1e0a-1e0b
+1e1e-1e1f
+1e40-1e41
+1e56-1e57
+1e60-1e61
+1e6a-1e6b
+1e9b
+#1680-169c	# Ogham
+#2018-2019	# single quotes
+#201c-201d	# double quotes
+204a		# tironian sign et
--- a/fc-lang/gd.orth
+++ b/fc-lang/gd.orth
@ -0,0 +1,15 @@
+# Scots Gaelic (GD)
+0040-005a
+0060-007a
+00c0
+00c7-00c9
+00cc
+00d2-00d3
+00d9
+00e0
+00e7-00e9
+00ec
+00f2-00f3
+00f9
+#2018-2019	# single quotes
+#201c-201d	# double quotes
--- a/fc-lang/gl.orth
+++ b/fc-lang/gl.orth
@ -0,0 +1,22 @@
+# Galician (GL)
+0040-005a
+0060-007a
+00a1
+00ab
+00bb
+00bf
+00c1
+00c9
+00cd
+00d1
+00d3
+00da
+00dc
+00e1
+00e9
+00ed
+00f1
+00f3
+00fa
+00fc
+#2019-201a	# single qutoes
--- a/fc-lang/he.orth
+++ b/fc-lang/he.orth
@ -0,0 +1,2 @@
+# Hebrew (HE)
+05d0-05ea
--- a/fc-lang/hr.orth
+++ b/fc-lang/hr.orth
@ -0,0 +1,21 @@
+# Croatian (HR)
+0040-005a
+0060-007a
+00c0
+00c8
+00cc
+00d2
+00d9
+00e0
+00e8
+00ec
+00f2
+00f9
+0106-0107
+010c-010d
+0110-0111
+0160-0161
+017d-017e
+01c4-01cc
+01f1-01f5
+0200-0217
--- a/fc-lang/hu.orth
+++ b/fc-lang/hu.orth
@ -0,0 +1,19 @@
+# Hungarian (HU)
+0040-005a
+0060-007a
+00c0-00c1
+00c9
+00cd
+00d3
+00d6
+00da
+00dc
+00e0-00e1
+00e9
+00ed
+00f3
+00f6
+00fa
+00fc
+0150-0151
+0170-0171
--- a/fc-lang/hy.orth
+++ b/fc-lang/hy.orth
@ -0,0 +1,5 @@
+# Armenian (HY)
+0531-0556
+055a-055f
+0561-0586
+0589-058a
--- a/fc-lang/is.orth
+++ b/fc-lang/is.orth
@ -0,0 +1,26 @@
+# Icelandic (IS)
+0040-005a
+0060-007a
+00c1
+00c4-00c6
+00c9
+00cb
+00cd
+00d0
+00d3
+00d8
+00da
+00dc-00de
+00e1
+00e4-00e6
+00e9
+00ed
+00f0
+00f3
+00f8
+00fa
+00fc-00fe
+#2018	# single quote
+#201a	# single quote
+#201c	# double quote
+#201e	# double quote
--- a/fc-lang/it.orth
+++ b/fc-lang/it.orth
@ -0,0 +1,14 @@
+# Italian (IT)
+0040-005a
+0060-007a
+00c0
+00c8-00c9
+00cc-00cd
+00cf
+00d2-00d3
+00d9-00da
+00e8-00e9
+00ec-00ed
+00ef
+00f2-00f3
+00f9-00fa
--- a/fc-lang/ja.orth
+++ b/fc-lang/ja.orth
--- a/fc-lang/ka.orth
+++ b/fc-lang/ka.orth
@ -0,0 +1,9 @@
+# Georgian (KA)
+0589	# Armenian full stop (vertsaket)
+10a0-10c5
+10d0-10f8
+10fb
+#2018	# single quote
+#201a	# single quote
+#201c	# double quote
+#201e	# double quote
--- a/fc-lang/kk.orth
+++ b/fc-lang/kk.orth
@ -0,0 +1,15 @@
+# Kazakh (KK)
+00ab
+00bb
+0401
+0406
+0410-044f
+0451
+0456
+0492-0493
+049a-049b
+04a2-04a3
+04ba-04bb
+04d8-04d9
+04e8-04e9
+#2039-203a	# angle quotes
--- a/fc-lang/kl.orth
+++ b/fc-lang/kl.orth
@ -0,0 +1,23 @@
+# Greenlandic (KL)
+0040-005a
+0060-007a
+00ab
+00bb
+00c1-00c3
+00c5-00c6
+00ca
+00cd-00ce
+00d4
+00d8
+00da-00db
+00e1-00e3
+00e5-00e6
+00ea
+00ed-00ee
+00f4
+00f8
+00fa-00fb
+0128-0129
+0138
+0168-0169
+#2039-203a	# angle quotes
--- a/fc-lang/ko.orth
+++ b/fc-lang/ko.orth
--- a/fc-lang/la.orth
+++ b/fc-lang/la.orth
@ -0,0 +1,8 @@
+# Latin (LA)
+0040-005a
+0060-007a
+0100-0101
+0112-0113
+012a-012d
+014c-014f
+016a-016d
--- a/fc-lang/lt.orth
+++ b/fc-lang/lt.orth
@ -0,0 +1,13 @@
+# Lithuanian (LT)
+0040-005a
+0060-007a
+0104-0105
+010c-010d
+0106-0109
+012e-012f
+0160-0161
+016a-016b
+0172-0173
+017d-017e
+#2019-201a	# single quotes
+#201d-201e	# double quotes
--- a/fc-lang/lv.orth
+++ b/fc-lang/lv.orth
@ -0,0 +1,16 @@
+# Latvian (LV)
+0040-005a
+0060-007a
+0100-0101
+010c-010d
+0112-0113
+0122-0123
+012a-012b
+0136-0137
+013b-013c
+0145-0146
+014c-014d
+0156-0157
+0160-0161
+016a-016b
+017d-017e
--- a/fc-lang/mk.orth
+++ b/fc-lang/mk.orth
@ -0,0 +1,15 @@
+# Macedonian (MK)
+0400
+0403
+0405
+0408-040a
+040c-040d
+040f
+0410-0418
+041a-0428
+0450
+0453
+0455
+0458-045a
+045c-045d
+045f
--- a/fc-lang/mo.orth
+++ b/fc-lang/mo.orth
@ -0,0 +1,14 @@
+# Moldavian (MO)
+0040-005a
+0060-007a
+00c2
+00ce
+00e2
+00ee
+0102-0103
+0218-021b
+0401
+0410-044f
+0451
+#2019-201a	# single quotes
+#201d-201e	# double quotes
--- a/fc-lang/mt.orth
+++ b/fc-lang/mt.orth
@ -0,0 +1,18 @@
+# Maltese (MT)
+0040-005a
+0060-007a
+00c0-00c2
+00c8-00ca
+00cc-00ce
+00d2-00d4
+00d9-00db
+00e0-00e2
+00e8-00ea
+00ec-00ee
+00f2-00f4
+00f9-00fb
+010a-010b
+0120-0121
+0126-0127
+017b-017c
+02bc
--- a/fc-lang/nl.orth
+++ b/fc-lang/nl.orth
@ -0,0 +1,15 @@
+# Dutch (NL)
+0040-005a
+0060-007a
+00c4
+00cb
+00cf
+00d6
+00dc
+00e4
+00eb
+00ef
+00f6
+00fc
+#0132-0133	# IJ and ij ligatures
+
--- a/fc-lang/no.orth
+++ b/fc-lang/no.orth
@ -0,0 +1,18 @@
+# Norwegian (Bokmål) (NO)
+0040-005a
+0060-007a
+00ab
+00bb
+00c0-00c2
+00c4-00cb
+00d2-00d4
+00d6
+00d8
+00dc
+00e0-00e2
+00e4-00eb
+00f2-00f4
+00f6
+00f8
+00fc
+#2039-203a	# angle quotes
--- a/fc-lang/oc.orth
+++ b/fc-lang/oc.orth
@ -0,0 +1,15 @@
+# Occitan (OC)
+0040-005a
+0060-007a
+00c0-00c1
+00c7-00c9
+00cb
+00cd
+00cf
+00d2-00d3
+00e0-00e1
+00e7-00e9
+00eb
+00ed
+00ef
+00f2-00f2
--- a/fc-lang/pl.orth
+++ b/fc-lang/pl.orth
@ -0,0 +1,10 @@
+# Polish (PL)
+0040-005a
+0060-007a
+00d3
+00f3
+0104-0107
+0118-0119
+0141-0144
+015a-015b
+0179-017c
--- a/fc-lang/pt.orth
+++ b/fc-lang/pt.orth
@ -0,0 +1,19 @@
+# Portuguese (PT)
+0040-005a
+0060-007a
+00bb
+00c0-00c3
+00c7
+00c9-00ca
+00cd
+00d3-00d5
+00da
+00dc
+00e0-00e3
+00e7
+00e9-00ea
+00ed
+00f3-00f5
+00fa
+00fc
+#203a	# angle quote
--- a/fc-lang/rm.orth
+++ b/fc-lang/rm.orth
@ -0,0 +1,15 @@
+# Rhaeto-Romance (Romansch) (RM)
+0040-005a
+0060-007a
+00c0
+00c8-00c9
+00cc
+00ce
+00d2
+00d9
+00e0
+00e8-00e9
+00ec
+00ee
+00f2
+00f9
--- a/fc-lang/ro.orth
+++ b/fc-lang/ro.orth
@ -0,0 +1,11 @@
+# Romanian (RO)
+0040-005a
+0060-007a
+00c2
+00ce
+00e2
+00ee
+0102-0103
+0218-021b
+#2019-201a	# single quotes
+#201d-201e	# double quotes
--- a/fc-lang/ru.orth
+++ b/fc-lang/ru.orth
@ -0,0 +1,11 @@
+# Russian (RU)
+00ab
+00bb
+0401
+0406
+0410-044f
+0451
+0456
+0462-0463
+0472-0475
+#2039-203a	# angle quotes
--- a/fc-lang/sh.orth
+++ b/fc-lang/sh.orth
@ -0,0 +1,2 @@
+# Serbo-Croatian (SH)
+include sr.orth
--- a/fc-lang/sk.orth
+++ b/fc-lang/sk.orth
@ -0,0 +1,29 @@
+# Slovak (SK)
+0040-005a
+0060-007a
+00c1
+00c4
+00c9
+00cd
+00d3-00d4
+00d6
+00da
+00dc-00dd
+00e1
+00e4
+00e9
+00ed
+00f3-00f4
+00f6
+00fa
+00fc-00fd
+010c-010f
+0139-013a
+013d-013e
+0147-0148
+0150-0151
+0154-0155
+0160-0161
+0164-0165
+0170-0171
+017d-017e
--- a/fc-lang/sl.orth
+++ b/fc-lang/sl.orth
@ -0,0 +1,21 @@
+# Slovenian (SL)
+0040-005a
+0060-007a
+0106-0107
+010c-010d
+0110-0111
+0160-0161
+017d-017e
+01c5-01c6
+01c8-01c9
+01cb-01cc
+01f2-01f5
+#1e30-1e31	# K, k with acute
+0402-0403
+0405
+0409-040c
+040f
+0452-0453
+0455
+0459-045c
+045f
--- a/fc-lang/sq.orth
+++ b/fc-lang/sq.orth
@ -0,0 +1,7 @@
+# Albanian (SQ)
+0040-005a
+0060-007a
+00c7
+00cb
+00e7
+00eb
--- a/fc-lang/sr.orth
+++ b/fc-lang/sr.orth
@ -0,0 +1,11 @@
+# Serbian (SR)
+0402
+0408-040b
+040f
+0410-0418
+041a-0428
+0430-0438
+043a-0448
+0452
+0458-045b
+045f
--- a/fc-lang/sv.orth
+++ b/fc-lang/sv.orth
@ -0,0 +1,21 @@
+# Swedish (SV)
+0040-005a
+0060-007a
+00bb
+00c0-00c3
+00c4-00d4
+00d6-00d8
+00d9-00dd
+00e0-00e3
+00e4-00f4
+00f6
+00f8
+00f9-00fd
+#0106-0107	# C, c with acute
+#010c-010d	# C, c with caron
+#0141-0144	# L, l with stroke  N, n with acute
+#0158-015b	# R, r with caron   S, s with acute
+#0160-0161	# S, s with caron
+#2019	# single quote
+#201d	# double quote
+#203a	# angle quote
--- a/fc-lang/th.orth
+++ b/fc-lang/th.orth
@ -0,0 +1,3 @@
+# Thai (TH)
+0e01-0e3a
+0e3f-0e5b
--- a/fc-lang/tr.orth
+++ b/fc-lang/tr.orth
@ -0,0 +1,12 @@
+# Turkish (TR)
+0040-005a
+0060-007a
+00c2
+00b7
+00d6
+00db-00dc
+00e2
+00e7
+00f6
+00fb-00fc
+011e-011f
--- a/fc-lang/uk.orth
+++ b/fc-lang/uk.orth
@ -0,0 +1,12 @@
+# Ukrainian (UK)
+00ab
+00bb
+0401
+0404
+0406-0407
+0410-044f
+0451
+0454
+0456-0457
+0490-0491
+#2039-203a	# angle quotes
--- a/fc-lang/vo.orth
+++ b/fc-lang/vo.orth
@ -0,0 +1,13 @@
+# Volapük (VO)
+0041-0050
+0052-0056
+0058-005a
+0061-0070
+0072-0076
+0078-007a
+00c4
+00d6
+00dc
+00e4
+00f6
+00fc
--- a/fc-lang/yi.orth
+++ b/fc-lang/yi.orth
@ -0,0 +1,2 @@
+# Yiddish (YI)
+include he.orth
--- a/fc-lang/zh_cn.orth
+++ b/fc-lang/zh_cn.orth
--- a/fc-lang/zh_tw.orth
+++ b/fc-lang/zh_tw.orth