[fc-lang] Make LangSet representation in the cache files stable

Fontconfig assigns an index number to each language it knows about.
The index is used to index a bit in FcLangSet language map.  The bit
map is stored in the cache.

Previously fc-lang simply sorted the list of languages and assigned
them an index starting from zero.  Net effect is that whenever new
orth files were added, all the FcLangSet info in the cache files would
become invalid.  This was causing weird bugs like this one:

  https://bugzilla.redhat.com/show_bug.cgi?id=490888

With this commit we fix the index assigned to each language.  The index
will be based on the order the orth files are passed to fc-lang.  As a
result all orth files are explicitly listed in Makefile.am now, and
new additions should be made to the end of the list.  The list is made
to reflect the sorted list of orthographies from 2.6.0 released followed
by new additions since.

This fixes the stability problem.  Needless to say, recreating caches
is necessary before any new orthography is recognized in existing fonts,
but at least the existing caches are still valid and don't cause bugs
like the above.
This commit is contained in:
Behdad Esfahbod 2009-03-18 19:28:52 -04:00
parent 4d13536db4
commit ffd6668b46
4 changed files with 282 additions and 26 deletions

View File

@ -487,13 +487,6 @@ CONFDIR=${confdir}
AC_DEFINE_UNQUOTED(CONFDIR, "$CONFDIR",[Font configuration directory])
AC_SUBST(CONFDIR)
#
# Find out what language orthographies are included
#
ORTH_FILES=`cd ${srcdir}/fc-lang && echo *.orth`
AC_SUBST(ORTH_FILES)
#
# Let people not build/install docs if they don't have docbook
#

View File

@ -36,8 +36,6 @@ noinst_PROGRAMS=fc-lang
noinst_MANS=fc-lang.man
ORTH=@ORTH_FILES@
EXTRA_DIST=$(TMPL) $(ORTH)
$(TARG):$(ORTH) fc-lang${EXEEXT} $(STMPL)
@ -53,3 +51,249 @@ $(ALIAS_FILES):
touch $(ALIAS_FILES)
CLEANFILES = $(TARG) $(ALIAS_FILES)
# NOTE:
#
# The order of the orth files here is extremely important (part of the cache
# format) and should not be modified. New orth files should be added at the
# end. No files should be removed either.
#
ORTH = \
aa.orth \
ab.orth \
af.orth \
am.orth \
ar.orth \
as.orth \
ast.orth \
av.orth \
ay.orth \
az_az.orth \
az_ir.orth \
ba.orth \
bm.orth \
be.orth \
bg.orth \
bh.orth \
bho.orth \
bi.orth \
bin.orth \
bn.orth \
bo.orth \
br.orth \
bs.orth \
bua.orth \
ca.orth \
ce.orth \
ch.orth \
chm.orth \
chr.orth \
co.orth \
cs.orth \
cu.orth \
cv.orth \
cy.orth \
da.orth \
de.orth \
dz.orth \
el.orth \
en.orth \
eo.orth \
es.orth \
et.orth \
eu.orth \
fa.orth \
fi.orth \
fj.orth \
fo.orth \
fr.orth \
ff.orth \
fur.orth \
fy.orth \
ga.orth \
gd.orth \
gez.orth \
gl.orth \
gn.orth \
gu.orth \
gv.orth \
ha.orth \
haw.orth \
he.orth \
hi.orth \
ho.orth \
hr.orth \
hu.orth \
hy.orth \
ia.orth \
ig.orth \
id.orth \
ie.orth \
ik.orth \
io.orth \
is.orth \
it.orth \
iu.orth \
ja.orth \
ka.orth \
kaa.orth \
ki.orth \
kk.orth \
kl.orth \
km.orth \
kn.orth \
ko.orth \
kok.orth \
ks.orth \
ku_am.orth \
ku_ir.orth \
kum.orth \
kv.orth \
kw.orth \
ky.orth \
la.orth \
lb.orth \
lez.orth \
ln.orth \
lo.orth \
lt.orth \
lv.orth \
mg.orth \
mh.orth \
mi.orth \
mk.orth \
ml.orth \
mn_cn.orth \
mo.orth \
mr.orth \
mt.orth \
my.orth \
nb.orth \
nds.orth \
ne.orth \
nl.orth \
nn.orth \
no.orth \
nr.orth \
nso.orth \
ny.orth \
oc.orth \
om.orth \
or.orth \
os.orth \
pa_in.orth \
pl.orth \
ps_af.orth \
ps_pk.orth \
pt.orth \
rm.orth \
ro.orth \
ru.orth \
sa.orth \
sah.orth \
sco.orth \
se.orth \
sel.orth \
sh.orth \
shs.orth \
si.orth \
sk.orth \
sl.orth \
sm.orth \
sma.orth \
smj.orth \
smn.orth \
sms.orth \
so.orth \
sq.orth \
sr.orth \
ss.orth \
st.orth \
sv.orth \
sw.orth \
syr.orth \
ta.orth \
te.orth \
tg.orth \
th.orth \
ti_er.orth \
ti_et.orth \
tig.orth \
tk.orth \
tl.orth \
tn.orth \
to.orth \
tr.orth \
ts.orth \
tt.orth \
tw.orth \
tyv.orth \
ug.orth \
uk.orth \
ur.orth \
uz.orth \
ve.orth \
vi.orth \
vo.orth \
vot.orth \
wa.orth \
wen.orth \
wo.orth \
xh.orth \
yap.orth \
yi.orth \
yo.orth \
zh_cn.orth \
zh_hk.orth \
zh_mo.orth \
zh_sg.orth \
zh_tw.orth \
zu.orth \
ak.orth \
an.orth \
ber_dz.orth \
ber_ma.orth \
byn.orth \
crh.orth \
csb.orth \
dv.orth \
ee.orth \
fat.orth \
fil.orth \
hne.orth \
hsb.orth \
ht.orth \
hz.orth \
ii.orth \
jv.orth \
kab.orth \
kj.orth \
kr.orth \
ku_iq.orth \
ku_tr.orth \
kwm.orth \
lg.orth \
li.orth \
mai.orth \
mn_mn.orth \
ms.orth \
na.orth \
ng.orth \
nv.orth \
ota.orth \
pa_pk.orth \
pap_an.orth \
pap_aw.orth \
qu.orth \
rn.orth \
rw.orth \
sc.orth \
sd.orth \
sg.orth \
sid.orth \
sn.orth \
su.orth \
ty.orth \
wal.orth \
za.orth
# ^-------------- Add new orth files here

View File

@ -129,7 +129,7 @@ static const FcCharSet *
scan (FILE *f, char *file, FcCharSetFreezer *freezer)
{
FcCharSet *c = 0;
const FcCharSet *n;
FcCharSet *n;
int start, end, ucs4;
char line[1024];
int lineno = 0;
@ -224,22 +224,27 @@ get_lang (char *name)
return lang;
}
typedef struct _Entry {
int id;
char *file;
} Entry;
static int compare (const void *a, const void *b)
{
const FcChar8 *const *as = a, *const *bs = b;
return FcStrCmpIgnoreCase (*as, *bs);
const Entry const *as = a, *bs = b;
return FcStrCmpIgnoreCase (as->file, bs->file);
}
#define MAX_LANG 1024
#define MAX_LANG_SET_MAP ((MAX_LANG + 31) / 32)
#define BitSet(map, id) ((map)[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
#define BitGet(map, id) ((map)[(id)>>5] >> ((id) & 0x1f)) & 1)
#define BitSet(map, i) ((map)[(entries[i].id)>>5] |= ((FcChar32) 1 << ((entries[i].id) & 0x1f)))
#define BitGet(map, i) ((map)[(entries[i].id)>>5] >> ((entries[i].id) & 0x1f)) & 1)
int
main (int argc, char **argv)
{
static char *files[MAX_LANG];
static Entry entries[MAX_LANG];
static const FcCharSet *sets[MAX_LANG];
static int duplicate[MAX_LANG];
static int country[MAX_LANG];
@ -276,18 +281,20 @@ main (int argc, char **argv)
}
if (i == MAX_LANG)
fatal (argv[0], 0, "Too many languages");
files[i++] = argv[argi++];
entries[i].id = i;
entries[i].file = argv[argi++];
i++;
}
files[i] = 0;
qsort (files, i, sizeof (char *), compare);
entries[i].file = 0;
qsort (entries, i, sizeof (Entry), compare);
i = 0;
while (files[i])
while (entries[i].file)
{
f = scanopen (files[i]);
f = scanopen (entries[i].file);
if (!f)
fatal (files[i], 0, strerror (errno));
sets[i] = scan (f, files[i], freezer);
names[i] = get_name (files[i]);
fatal (entries[i].file, 0, strerror (errno));
sets[i] = scan (f, entries[i].file, freezer);
names[i] = get_name (entries[i].file);
langs[i] = get_lang(names[i]);
if (strchr (langs[i], '-'))
country[ncountry++] = i;
@ -362,6 +369,7 @@ main (int argc, char **argv)
printf ("#define NUM(s,n) (NUM0 + n * sizeof (FcChar16) - SET(s))\n");
printf ("#define LEAF(o,l) (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))\n");
printf ("#define fcLangCharSets (fcLangData.langCharSets)\n");
printf ("#define fcLangCharSetIndices (fcLangData.langIndices)\n");
printf ("\n");
printf ("static const struct {\n"
@ -369,8 +377,10 @@ main (int argc, char **argv)
" FcCharLeaf leaves[%d];\n"
" intptr_t leaf_offsets[%d];\n"
" FcChar16 numbers[%d];\n"
" FcChar%s langIndices[%d];\n"
"} fcLangData = {\n",
nsets, tl, tn, tn);
nsets, tl, tn, tn,
nsets < 256 ? "8 " : "16", nsets);
/*
* Dump sets
@ -457,6 +467,13 @@ main (int argc, char **argv)
if (n % 8 != 0)
printf ("\n");
}
printf ("},\n");
printf ("{\n");
for (i = 0; sets[i]; i++)
{
printf (" %d, /* %s */\n", entries[i].id, names[i]);
}
printf ("}\n");
printf ("};\n\n");
@ -527,7 +544,9 @@ main (int argc, char **argv)
/*
* Dump sets start/finish for the fastpath
*/
printf ("\n");
printf ("static const FcLangCharSetRange fcLangCharSetRanges[] = {\n");
printf ("\n");
for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++)
{
printf (" { %d, %d }, /* %c */\n",

View File

@ -42,8 +42,8 @@ struct _FcLangSet {
FcStrSet *extra;
};
#define FcLangSetBitSet(ls, id) ((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
#define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)
#define FcLangSetBitSet(ls, id) ((ls)->map[(fcLangCharSetIndices[id])>>5] |= ((FcChar32) 1 << ((fcLangCharSetIndices[id]) & 0x1f)))
#define FcLangSetBitGet(ls, id) (((ls)->map[(fcLangCharSetIndices[id])>>5] >> ((fcLangCharSetIndices[id]) & 0x1f)) & 1)
FcLangSet *
FcFreeTypeLangSet (const FcCharSet *charset,