Guess that mac roman names with lots of high bits are actually SJIS.
Many Japanese fonts incorrectly include names tagged as Roman encoding and English language which are actually Japanese names in the SJIS encoding. Guess that names with a large number of high bits set are SJIS encoded Japanese names rather than English names.
This commit is contained in:
parent
db970d3596
commit
7295c6f5fa
|
@ -560,6 +560,28 @@ FcFontCapabilities(FT_Face face);
|
|||
#include <iconv.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* A shift-JIS will have many high bits turned on
|
||||
*/
|
||||
static FcBool
|
||||
FcLooksLikeSJIS (FcChar8 *string, int len)
|
||||
{
|
||||
int nhigh = 0, nlow = 0;
|
||||
|
||||
while (len-- > 0)
|
||||
{
|
||||
if (*string++ & 0x80) nhigh++;
|
||||
else nlow++;
|
||||
}
|
||||
/*
|
||||
* Heuristic -- if more than 1/3 of the bytes have the high-bit set,
|
||||
* this is likely to be SJIS and not ROMAN
|
||||
*/
|
||||
if (nhigh * 2 > nlow)
|
||||
return FcTrue;
|
||||
return FcFalse;
|
||||
}
|
||||
|
||||
static FcChar8 *
|
||||
FcSfntNameTranscode (FT_SfntName *sname)
|
||||
{
|
||||
|
@ -579,14 +601,24 @@ FcSfntNameTranscode (FT_SfntName *sname)
|
|||
return 0;
|
||||
fromcode = fcFtEncoding[i].fromcode;
|
||||
|
||||
/*
|
||||
* Many names encoded for TT_PLATFORM_MACINTOSH are broken
|
||||
* in various ways. Kludge around them.
|
||||
*/
|
||||
if (!strcmp (fromcode, FC_ENCODING_MAC_ROMAN))
|
||||
{
|
||||
if (sname->language_id == TT_MAC_LANGID_ENGLISH &&
|
||||
FcLooksLikeSJIS (sname->string, sname->string_len))
|
||||
{
|
||||
fromcode = "SJIS";
|
||||
}
|
||||
else if (sname->language_id >= 0x100)
|
||||
{
|
||||
/*
|
||||
* "real" Mac language IDs are all less than 150.
|
||||
* Names using one of the MS language IDs are assumed
|
||||
* to use an associated encoding (Yes, this is a kludge)
|
||||
*/
|
||||
if (!strcmp (fromcode, FC_ENCODING_MAC_ROMAN) &&
|
||||
sname->language_id >= 0x100)
|
||||
{
|
||||
int f;
|
||||
|
||||
fromcode = NULL;
|
||||
|
@ -599,6 +631,7 @@ FcSfntNameTranscode (FT_SfntName *sname)
|
|||
if (!fromcode)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (!strcmp (fromcode, "UCS-2BE") || !strcmp (fromcode, "UTF-16BE"))
|
||||
{
|
||||
FcChar8 *src = sname->string;
|
||||
|
@ -738,10 +771,24 @@ static const FcChar8 *
|
|||
FcSfntNameLanguage (FT_SfntName *sname)
|
||||
{
|
||||
int i;
|
||||
FT_UShort platform_id = sname->platform_id;
|
||||
FT_UShort language_id = sname->language_id;
|
||||
|
||||
/*
|
||||
* Many names encoded for TT_PLATFORM_MACINTOSH are broken
|
||||
* in various ways. Kludge around them.
|
||||
*/
|
||||
if (platform_id == TT_PLATFORM_MACINTOSH &&
|
||||
sname->encoding_id == TT_MAC_ID_ROMAN &&
|
||||
FcLooksLikeSJIS (sname->string, sname->string_len))
|
||||
{
|
||||
language_id = TT_MAC_LANGID_JAPANESE;
|
||||
}
|
||||
|
||||
for (i = 0; i < NUM_FC_FT_LANGUAGE; i++)
|
||||
if (fcFtLanguage[i].platform_id == sname->platform_id &&
|
||||
if (fcFtLanguage[i].platform_id == platform_id &&
|
||||
(fcFtLanguage[i].language_id == TT_LANGUAGE_DONT_CARE ||
|
||||
fcFtLanguage[i].language_id == sname->language_id))
|
||||
fcFtLanguage[i].language_id == language_id))
|
||||
{
|
||||
if (fcFtLanguage[i].lang[0] == '\0')
|
||||
return NULL;
|
||||
|
|
Loading…
Reference in New Issue