fontconfig/fc-lang/fc-lang.c

591 lines
14 KiB
C
Raw Normal View History

/*
2008-08-12 22:34:24 +02:00
* fontconfig/fc-lang/fc-lang.c
*
2004-12-07 02:14:46 +01:00
* Copyright © 2002 Keith Packard
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of the author(s) not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. The authors make no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
#include "fccharset.c"
#include "fcstr.c"
#include "fcserialize.c"
/*
* fc-lang
*
* Read a set of language orthographies and build C declarations for
* charsets which can then be used to identify which languages are
* supported by a given font. Note that this uses some utilities
* from the fontconfig library, so the necessary file is simply
* included in this compilation. A couple of extra utility
* functions are also needed in slightly modified form
*/
FcPrivate void
FcCacheObjectReference (void *object FC_UNUSED)
{
}
FcPrivate void
FcCacheObjectDereference (void *object FC_UNUSED)
{
}
FcPrivate FcChar8 *
FcLangNormalize (const FcChar8 *lang FC_UNUSED)
{
return NULL;
}
int FcDebugVal;
FcChar8 *
FcConfigHome (void)
{
return (FcChar8 *) getenv ("HOME");
}
static void
fatal (const char *file, int lineno, const char *msg)
{
if (lineno)
fprintf (stderr, "%s:%d: %s\n", file, lineno, msg);
else
fprintf (stderr, "%s: %s\n", file, msg);
exit (1);
}
static char *
get_line (FILE *f, char *buf, int *lineno)
{
char *hash;
char *line;
int end;
next:
line = buf;
if (!fgets (line, 1024, f))
return 0;
++(*lineno);
hash = strchr (line, '#');
if (hash)
*hash = '\0';
while (line[0] && isspace (line[0]))
line++;
end = strlen (line);
while (end > 0 && isspace (line[end-1]))
line[--end] = '\0';
if (line[0] == '\0' || line[0] == '\n' || line[0] == '\r')
goto next;
return line;
}
static char *dir = 0;
static FILE *
scanopen (char *file)
{
FILE *f;
f = fopen (file, "r");
if (!f && dir)
{
char path[1024];
strcpy (path, dir);
strcat (path, "/");
strcat (path, file);
f = fopen (path, "r");
}
return f;
}
/*
* build a single charset from a source file
*
* The file format is quite simple, either
* a single hex value or a pair separated with a dash
*
* Comments begin with '#'
*/
static FcCharSet *
scan (FILE *f, char *file, FcCharSetFreezer *freezer)
{
FcCharSet *c = 0;
FcCharSet *n;
FcBool del;
int start, end, ucs4;
char buf[1024];
char *line;
int lineno = 0;
while ((line = get_line (f, buf, &lineno)))
{
if (!strncmp (line, "include", 7))
{
FILE *included_f;
char *included_file;
included_file = strchr (line, ' ');
if (!included_file)
fatal (file, lineno,
"invalid syntax, expected: include filename");
while (isspace(*included_file))
included_file++;
included_f = scanopen (included_file);
if (!included_f)
fatal (included_file, 0, "can't open");
n = scan (included_f, included_file, freezer);
fclose (included_f);
if (!c)
c = FcCharSetCreate ();
if (!FcCharSetMerge (c, n, NULL))
fatal (file, lineno, "out of memory");
FcCharSetDestroy (n);
continue;
}
del = FcFalse;
if (line[0] == '-')
{
del = FcTrue;
line++;
}
if (strchr (line, '-'))
{
if (sscanf (line, "%x-%x", &start, &end) != 2)
fatal (file, lineno, "parse error");
}
else
{
if (sscanf (line, "%x", &start) != 1)
fatal (file, lineno, "parse error");
end = start;
}
if (!c)
c = FcCharSetCreate ();
for (ucs4 = start; ucs4 <= end; ucs4++)
{
if (!((del ? FcCharSetDelChar : FcCharSetAddChar) (c, ucs4)))
fatal (file, lineno, "out of memory");
}
}
2011-03-14 22:58:13 +01:00
n = (FcCharSet *) FcCharSetFreeze (freezer, c);
FcCharSetDestroy (c);
return n;
}
/*
* Convert a file name into a name suitable for C declarations
*/
static char *
get_name (char *file)
{
char *name;
char *dot;
dot = strchr (file, '.');
if (!dot)
dot = file + strlen(file);
name = malloc (dot - file + 1);
strncpy (name, file, dot - file);
name[dot-file] = '\0';
return name;
}
/*
* Convert a C name into a language name
*/
static char *
get_lang (char *name)
{
char *lang = malloc (strlen (name) + 1);
char *l = lang;
char c;
while ((c = *name++))
{
2003-04-17 17:47:34 +02:00
if (isupper ((int) (unsigned char) c))
c = tolower ((int) (unsigned char) c);
if (c == '_')
c = '-';
if (c == ' ')
continue;
*l++ = c;
}
*l++ = '\0';
return lang;
}
typedef struct _Entry {
int id;
char *file;
} Entry;
static int compare (const void *a, const void *b)
{
const Entry *as = a, *bs = b;
return FcStrCmpIgnoreCase ((const FcChar8 *) as->file, (const FcChar8 *) bs->file);
}
#define MAX_LANG 1024
#define MAX_LANG_SET_MAP ((MAX_LANG + 31) / 32)
#define BitSet(map, i) ((map)[(entries[i].id)>>5] |= ((FcChar32) 1 << ((entries[i].id) & 0x1f)))
int
main (int argc FC_UNUSED, char **argv)
{
static Entry entries[MAX_LANG + 1];
static FcCharSet *sets[MAX_LANG];
static int duplicate[MAX_LANG];
static int country[MAX_LANG];
static char *names[MAX_LANG];
static char *langs[MAX_LANG];
static int off[MAX_LANG];
FILE *f;
int ncountry = 0;
int i = 0;
int nsets = 0;
int argi;
Add functionality to allow fontconfig data structure serialization. This patch allows the fundamental fontconfig data structures to be serialized. I've converted everything from FcPattern down to be able to use *Ptr objects, which can be either static or dynamic (using a union which either contains a pointer or an index) and replaced storage of pointers in the heap with the appropriate *Ptr object. I then changed all writes of pointers to the heap with a *CreateDynamic call, which creates a dynamic Ptr object pointing to the same object as before. This way, the fundamental fontconfig semantics should be unchanged; I did not have to change external signatures this way, although I did change some internal signatures. When given a *Ptr object, just run *U to get back to a normal pointer; it gives the right answer regardless of whether we're using static or dynamic storage. I've also implemented a Fc*Serialize call. Calling FcFontSetSerialize converts the dynamic FcFontSets contained in the config object to static FcFontSets and also converts its dependencies (e.g. everything you'd need to write to disk) to static objects. Note that you have to call Fc*PrepareSerialize first; this call will count the number of objects that actually needs to be allocated, so that we can avoid realloc. The Fc*Serialize calls then check the static pointers for nullness, and allocate the buffers if necessary. I've tested the execution of fc-list and fc-match after Fc*Serialize and they appear to work the same way.
2005-06-28 05:41:02 +02:00
FcCharLeaf **leaves;
int total_leaves = 0;
int l, sl, tl, tn;
static char line[1024];
static FcChar32 map[MAX_LANG_SET_MAP];
int num_lang_set_map;
int setRangeStart[26];
int setRangeEnd[26];
FcChar8 setRangeChar;
FcCharSetFreezer *freezer;
freezer = FcCharSetFreezerCreate ();
if (!freezer)
fatal (argv[0], 0, "out of memory");
argi = 1;
while (argv[argi])
{
if (!strcmp (argv[argi], "-d"))
{
argi++;
dir = argv[argi++];
continue;
}
if (i == MAX_LANG)
fatal (argv[0], 0, "Too many languages");
entries[i].id = i;
entries[i].file = argv[argi++];
i++;
}
entries[i].file = 0;
qsort (entries, i, sizeof (Entry), compare);
i = 0;
while (entries[i].file)
{
f = scanopen (entries[i].file);
if (!f)
fatal (entries[i].file, 0, strerror (errno));
sets[i] = scan (f, entries[i].file, freezer);
names[i] = get_name (entries[i].file);
langs[i] = get_lang(names[i]);
if (strchr (langs[i], '-'))
country[ncountry++] = i;
total_leaves += sets[i]->num;
i++;
fclose (f);
}
nsets = i;
sets[i] = 0;
leaves = malloc (total_leaves * sizeof (FcCharLeaf *));
tl = 0;
/*
* Find unique leaves
*/
for (i = 0; sets[i]; i++)
{
for (sl = 0; sl < sets[i]->num; sl++)
{
for (l = 0; l < tl; l++)
if (leaves[l] == FcCharSetLeaf(sets[i], sl))
break;
if (l == tl)
leaves[tl++] = FcCharSetLeaf(sets[i], sl);
}
}
/*
* Scan the input until the marker is found
*/
while (fgets (line, sizeof (line), stdin))
{
if (!strncmp (line, "@@@", 3))
break;
fputs (line, stdout);
}
printf ("/* total size: %d unique leaves: %d */\n\n",
total_leaves, tl);
2002-07-07 21:18:51 +02:00
/*
* Find duplicate charsets
*/
duplicate[0] = -1;
for (i = 1; sets[i]; i++)
{
int j;
duplicate[i] = -1;
for (j = 0; j < i; j++)
if (sets[j] == sets[i])
{
duplicate[i] = j;
break;
}
}
tn = 0;
for (i = 0; sets[i]; i++) {
if (duplicate[i] >= 0)
continue;
off[i] = tn;
tn += sets[i]->num;
}
printf ("#define LEAF0 (%d * sizeof (FcLangCharSet))\n", nsets);
printf ("#define OFF0 (LEAF0 + %d * sizeof (FcCharLeaf))\n", tl);
2010-06-03 04:38:19 +02:00
printf ("#define NUM0 (OFF0 + %d * sizeof (uintptr_t))\n", tn);
printf ("#define SET(n) (n * sizeof (FcLangCharSet) + offsetof (FcLangCharSet, charset))\n");
2010-06-03 04:38:19 +02:00
printf ("#define OFF(s,o) (OFF0 + o * sizeof (uintptr_t) - SET(s))\n");
printf ("#define NUM(s,n) (NUM0 + n * sizeof (FcChar16) - SET(s))\n");
printf ("#define LEAF(o,l) (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))\n");
printf ("#define fcLangCharSets (fcLangData.langCharSets)\n");
printf ("#define fcLangCharSetIndices (fcLangData.langIndices)\n");
printf ("#define fcLangCharSetIndicesInv (fcLangData.langIndicesInv)\n");
printf ("\n");
printf ("static const struct {\n"
" FcLangCharSet langCharSets[%d];\n"
" FcCharLeaf leaves[%d];\n"
2010-06-03 04:38:19 +02:00
" uintptr_t leaf_offsets[%d];\n"
" FcChar16 numbers[%d];\n"
" FcChar%s langIndices[%d];\n"
" FcChar%s langIndicesInv[%d];\n"
"} fcLangData = {\n",
nsets, tl, tn, tn,
nsets < 256 ? "8 " : "16", nsets, nsets < 256 ? "8 " : "16", nsets);
/*
* Dump sets
*/
printf ("{\n");
for (i = 0; sets[i]; i++)
{
int j = duplicate[i];
if (j < 0)
j = i;
printf (" { \"%s\", "
" { FC_REF_CONSTANT, %d, OFF(%d,%d), NUM(%d,%d) } }, /* %d */\n",
langs[i],
sets[j]->num, i, off[j], i, off[j], i);
}
printf ("},\n");
/*
* Dump leaves
*/
printf ("{\n");
for (l = 0; l < tl; l++)
{
printf (" { { /* %d */", l);
for (i = 0; i < 256/32; i++)
{
if (i % 4 == 0)
printf ("\n ");
printf (" 0x%08x,", leaves[l]->map[i]);
}
printf ("\n } },\n");
}
printf ("},\n");
/*
* Dump leaves
*/
printf ("{\n");
for (i = 0; sets[i]; i++)
{
int n;
if (duplicate[i] >= 0)
continue;
printf (" /* %s */\n", names[i]);
for (n = 0; n < sets[i]->num; n++)
{
if (n % 4 == 0)
printf (" ");
for (l = 0; l < tl; l++)
if (leaves[l] == FcCharSetLeaf(sets[i], n))
break;
if (l == tl)
fatal (names[i], 0, "can't find leaf");
printf (" LEAF(%3d,%3d),", off[i], l);
if (n % 4 == 3)
printf ("\n");
}
if (n % 4 != 0)
printf ("\n");
}
printf ("},\n");
printf ("{\n");
for (i = 0; sets[i]; i++)
{
int n;
if (duplicate[i] >= 0)
continue;
printf (" /* %s */\n", names[i]);
for (n = 0; n < sets[i]->num; n++)
{
if (n % 8 == 0)
printf (" ");
printf (" 0x%04x,", FcCharSetNumbers (sets[i])[n]);
if (n % 8 == 7)
printf ("\n");
}
if (n % 8 != 0)
printf ("\n");
}
printf ("},\n");
/* langIndices */
printf ("{\n");
for (i = 0; sets[i]; i++)
{
printf (" %d, /* %s */\n", entries[i].id, names[i]);
}
printf ("},\n");
/* langIndicesInv */
printf ("{\n");
{
static int entries_inv[MAX_LANG];
for (i = 0; sets[i]; i++)
entries_inv[entries[i].id] = i;
for (i = 0; sets[i]; i++)
printf (" %d, /* %s */\n", entries_inv[i], names[entries_inv[i]]);
}
printf ("}\n");
printf ("};\n\n");
printf ("#define NUM_LANG_CHAR_SET %d\n", i);
num_lang_set_map = (i + 31) / 32;
printf ("#define NUM_LANG_SET_MAP %d\n", num_lang_set_map);
/*
* Dump indices with country codes
*/
if (ncountry)
{
int c;
int ncountry_ent = 0;
printf ("\n");
printf ("static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {\n");
for (c = 0; c < ncountry; c++)
{
i = country[c];
if (i >= 0)
{
int lang = strchr (langs[i], '-') - langs[i];
int d, k;
for (k = 0; k < num_lang_set_map; k++)
map[k] = 0;
BitSet (map, i);
for (d = c + 1; d < ncountry; d++)
{
int j = country[d];
if (j >= 0 && !strncmp (langs[j], langs[i], lang + 1))
{
BitSet(map, j);
country[d] = -1;
}
}
printf (" {");
for (k = 0; k < num_lang_set_map; k++)
printf (" 0x%08x,", map[k]);
printf (" }, /* %*.*s */\n",
lang, lang, langs[i]);
++ncountry_ent;
}
}
printf ("};\n\n");
printf ("#define NUM_COUNTRY_SET %d\n", ncountry_ent);
}
/*
* Find ranges for each letter for faster searching
*/
setRangeChar = 'a';
memset(setRangeStart, '\0', sizeof (setRangeStart));
memset(setRangeEnd, '\0', sizeof (setRangeEnd));
for (i = 0; sets[i]; i++)
{
char c = names[i][0];
while (setRangeChar <= c && c <= 'z')
setRangeStart[setRangeChar++ - 'a'] = i;
}
while (setRangeChar <= 'z') /* no language code starts with these letters */
setRangeStart[setRangeChar++ - 'a'] = i;
for (setRangeChar = 'a'; setRangeChar < 'z'; setRangeChar++)
setRangeEnd[setRangeChar - 'a'] = setRangeStart[setRangeChar+1-'a'] - 1;
setRangeEnd[setRangeChar - 'a'] = i - 1;
/*
* Dump sets start/finish for the fastpath
*/
printf ("\n");
printf ("static const FcLangCharSetRange fcLangCharSetRanges[] = {\n");
printf ("\n");
for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++)
{
printf (" { %d, %d }, /* %c */\n",
setRangeStart[setRangeChar - 'a'],
setRangeEnd[setRangeChar - 'a'], setRangeChar);
}
printf ("};\n\n");
while (fgets (line, sizeof (line), stdin))
fputs (line, stdout);
fflush (stdout);
exit (ferror (stdout));
}