Add script to/from ISO 15924 tag support

Also adds --script support to hb-view.

If a script tag is not known to us, we pass the ISO 15924 tag around.
Right now, the OT layer ignores that, but we can fix it to blindly
convert that to an OT script tag.
This commit is contained in:
Behdad Esfahbod 2011-03-16 17:36:32 -03:00
parent 3286fc0e9a
commit 5d91c3d547
4 changed files with 293 additions and 2 deletions

View File

@ -152,6 +152,8 @@ hb_ot_tags_from_script (hb_script_t script)
{
static const hb_tag_t def_tag[] = {HB_OT_TAG_DEFAULT_SCRIPT, HB_TAG_NONE};
/* XXX Handle non-enum scripts */
if (unlikely ((unsigned int) script >= ARRAY_LENGTH (ot_scripts)))
return def_tag;
@ -170,6 +172,8 @@ hb_ot_tag_to_script (hb_tag_t tag)
return i;
}
/* XXX Convert to non-enum scripts */
return HB_SCRIPT_UNKNOWN;
}

View File

@ -234,6 +234,279 @@ hb_unicode_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
}
/* hb_script_t */
static const hb_tag_t script_to_iso15924_tag[] =
{
HB_TAG('Z','y','y','y'), /* HB_SCRIPT_COMMON */
HB_TAG('Q','a','a','i'), /* HB_SCRIPT_INHERITED */
HB_TAG('A','r','a','b'), /* HB_SCRIPT_ARABIC */
HB_TAG('A','r','m','n'), /* HB_SCRIPT_ARMENIAN */
HB_TAG('B','e','n','g'), /* HB_SCRIPT_BENGALI */
HB_TAG('B','o','p','o'), /* HB_SCRIPT_BOPOMOFO */
HB_TAG('C','h','e','r'), /* HB_SCRIPT_CHEROKEE */
HB_TAG('Q','a','a','c'), /* HB_SCRIPT_COPTIC */
HB_TAG('C','y','r','l'), /* HB_SCRIPT_CYRILLIC */
HB_TAG('D','s','r','t'), /* HB_SCRIPT_DESERET */
HB_TAG('D','e','v','a'), /* HB_SCRIPT_DEVANAGARI */
HB_TAG('E','t','h','i'), /* HB_SCRIPT_ETHIOPIC */
HB_TAG('G','e','o','r'), /* HB_SCRIPT_GEORGIAN */
HB_TAG('G','o','t','h'), /* HB_SCRIPT_GOTHIC */
HB_TAG('G','r','e','k'), /* HB_SCRIPT_GREEK */
HB_TAG('G','u','j','r'), /* HB_SCRIPT_GUJARATI */
HB_TAG('G','u','r','u'), /* HB_SCRIPT_GURMUKHI */
HB_TAG('H','a','n','i'), /* HB_SCRIPT_HAN */
HB_TAG('H','a','n','g'), /* HB_SCRIPT_HANGUL */
HB_TAG('H','e','b','r'), /* HB_SCRIPT_HEBREW */
HB_TAG('H','i','r','a'), /* HB_SCRIPT_HIRAGANA */
HB_TAG('K','n','d','a'), /* HB_SCRIPT_KANNADA */
HB_TAG('K','a','n','a'), /* HB_SCRIPT_KATAKANA */
HB_TAG('K','h','m','r'), /* HB_SCRIPT_KHMER */
HB_TAG('L','a','o','o'), /* HB_SCRIPT_LAO */
HB_TAG('L','a','t','n'), /* HB_SCRIPT_LATIN */
HB_TAG('M','l','y','m'), /* HB_SCRIPT_MALAYALAM */
HB_TAG('M','o','n','g'), /* HB_SCRIPT_MONGOLIAN */
HB_TAG('M','y','m','r'), /* HB_SCRIPT_MYANMAR */
HB_TAG('O','g','a','m'), /* HB_SCRIPT_OGHAM */
HB_TAG('I','t','a','l'), /* HB_SCRIPT_OLD_ITALIC */
HB_TAG('O','r','y','a'), /* HB_SCRIPT_ORIYA */
HB_TAG('R','u','n','r'), /* HB_SCRIPT_RUNIC */
HB_TAG('S','i','n','h'), /* HB_SCRIPT_SINHALA */
HB_TAG('S','y','r','c'), /* HB_SCRIPT_SYRIAC */
HB_TAG('T','a','m','l'), /* HB_SCRIPT_TAMIL */
HB_TAG('T','e','l','u'), /* HB_SCRIPT_TELUGU */
HB_TAG('T','h','a','a'), /* HB_SCRIPT_THAANA */
HB_TAG('T','h','a','i'), /* HB_SCRIPT_THAI */
HB_TAG('T','i','b','t'), /* HB_SCRIPT_TIBETAN */
HB_TAG('C','a','n','s'), /* HB_SCRIPT_CANADIAN_ABORIGINAL */
HB_TAG('Y','i','i','i'), /* HB_SCRIPT_YI */
HB_TAG('T','g','l','g'), /* HB_SCRIPT_TAGALOG */
HB_TAG('H','a','n','o'), /* HB_SCRIPT_HANUNOO */
HB_TAG('B','u','h','d'), /* HB_SCRIPT_BUHID */
HB_TAG('T','a','g','b'), /* HB_SCRIPT_TAGBANWA */
/* Unicode-4.0 additions */
HB_TAG('B','r','a','i'), /* HB_SCRIPT_BRAILLE */
HB_TAG('C','p','r','t'), /* HB_SCRIPT_CYPRIOT */
HB_TAG('L','i','m','b'), /* HB_SCRIPT_LIMBU */
HB_TAG('O','s','m','a'), /* HB_SCRIPT_OSMANYA */
HB_TAG('S','h','a','w'), /* HB_SCRIPT_SHAVIAN */
HB_TAG('L','i','n','b'), /* HB_SCRIPT_LINEAR_B */
HB_TAG('T','a','l','e'), /* HB_SCRIPT_TAI_LE */
HB_TAG('U','g','a','r'), /* HB_SCRIPT_UGARITIC */
/* Unicode-4.1 additions */
HB_TAG('T','a','l','u'), /* HB_SCRIPT_NEW_TAI_LUE */
HB_TAG('B','u','g','i'), /* HB_SCRIPT_BUGINESE */
HB_TAG('G','l','a','g'), /* HB_SCRIPT_GLAGOLITIC */
HB_TAG('T','f','n','g'), /* HB_SCRIPT_TIFINAGH */
HB_TAG('S','y','l','o'), /* HB_SCRIPT_SYLOTI_NAGRI */
HB_TAG('X','p','e','o'), /* HB_SCRIPT_OLD_PERSIAN */
HB_TAG('K','h','a','r'), /* HB_SCRIPT_KHAROSHTHI */
/* Unicode-5.0 additions */
HB_TAG('Z','z','z','z'), /* HB_SCRIPT_UNKNOWN */
HB_TAG('B','a','l','i'), /* HB_SCRIPT_BALINESE */
HB_TAG('X','s','u','x'), /* HB_SCRIPT_CUNEIFORM */
HB_TAG('P','h','n','x'), /* HB_SCRIPT_PHOENICIAN */
HB_TAG('P','h','a','g'), /* HB_SCRIPT_PHAGS_PA */
HB_TAG('N','k','o','o'), /* HB_SCRIPT_NKO */
/* Unicode-5.1 additions */
HB_TAG('K','a','l','i'), /* HB_SCRIPT_KAYAH_LI */
HB_TAG('L','e','p','c'), /* HB_SCRIPT_LEPCHA */
HB_TAG('R','j','n','g'), /* HB_SCRIPT_REJANG */
HB_TAG('S','u','n','d'), /* HB_SCRIPT_SUNDANESE */
HB_TAG('S','a','u','r'), /* HB_SCRIPT_SAURASHTRA */
HB_TAG('C','h','a','m'), /* HB_SCRIPT_CHAM */
HB_TAG('O','l','c','k'), /* HB_SCRIPT_OL_CHIKI */
HB_TAG('V','a','i','i'), /* HB_SCRIPT_VAI */
HB_TAG('C','a','r','i'), /* HB_SCRIPT_CARIAN */
HB_TAG('L','y','c','i'), /* HB_SCRIPT_LYCIAN */
HB_TAG('L','y','d','i'), /* HB_SCRIPT_LYDIAN */
/* Unicode-5.2 additions */
HB_TAG('A','v','s','t'), /* HB_SCRIPT_AVESTAN */
HB_TAG('B','a','m','u'), /* HB_SCRIPT_BAMUM */
HB_TAG('E','g','y','p'), /* HB_SCRIPT_EGYPTIAN_HIEROGLYPHS */
HB_TAG('A','r','m','i'), /* HB_SCRIPT_IMPERIAL_ARAMAIC */
HB_TAG('P','h','l','i'), /* HB_SCRIPT_INSCRIPTIONAL_PAHLAVI */
HB_TAG('P','r','t','i'), /* HB_SCRIPT_INSCRIPTIONAL_PARTHIAN */
HB_TAG('J','a','v','a'), /* HB_SCRIPT_JAVANESE */
HB_TAG('K','t','h','i'), /* HB_SCRIPT_KAITHI */
HB_TAG('L','i','s','u'), /* HB_SCRIPT_LISU */
HB_TAG('M','t','e','i'), /* HB_SCRIPT_MEETEI_MAYEK */
HB_TAG('S','a','r','b'), /* HB_SCRIPT_OLD_SOUTH_ARABIAN */
HB_TAG('O','r','k','h'), /* HB_SCRIPT_OLD_TURKIC */
HB_TAG('S','a','m','r'), /* HB_SCRIPT_SAMARITAN */
HB_TAG('L','a','n','a'), /* HB_SCRIPT_TAI_THAM */
HB_TAG('T','a','v','t'), /* HB_SCRIPT_TAI_VIET */
/* Unicode-6.0 additions */
HB_TAG('B','a','t','k'), /* HB_SCRIPT_BATAK */
HB_TAG('B','r','a','h'), /* HB_SCRIPT_BRAHMI */
HB_TAG('M','a','n','d') /* HB_SCRIPT_MANDAIC */
};
struct tag_script_pair {
hb_tag_t tag;
hb_script_t script;
};
static const struct tag_script_pair script_from_iso15924_tag[] =
{
{HB_TAG('A','r','a','b'), HB_SCRIPT_ARABIC},
{HB_TAG('A','r','m','i'), HB_SCRIPT_IMPERIAL_ARAMAIC},
{HB_TAG('A','r','m','n'), HB_SCRIPT_ARMENIAN},
{HB_TAG('A','v','s','t'), HB_SCRIPT_AVESTAN},
{HB_TAG('B','a','l','i'), HB_SCRIPT_BALINESE},
{HB_TAG('B','a','m','u'), HB_SCRIPT_BAMUM},
{HB_TAG('B','a','t','k'), HB_SCRIPT_BATAK},
{HB_TAG('B','e','n','g'), HB_SCRIPT_BENGALI},
{HB_TAG('B','o','p','o'), HB_SCRIPT_BOPOMOFO},
{HB_TAG('B','r','a','h'), HB_SCRIPT_BRAHMI},
{HB_TAG('B','r','a','i'), HB_SCRIPT_BRAILLE},
{HB_TAG('B','u','g','i'), HB_SCRIPT_BUGINESE},
{HB_TAG('B','u','h','d'), HB_SCRIPT_BUHID},
{HB_TAG('C','a','n','s'), HB_SCRIPT_CANADIAN_ABORIGINAL},
{HB_TAG('C','a','r','i'), HB_SCRIPT_CARIAN},
{HB_TAG('C','h','a','m'), HB_SCRIPT_CHAM},
{HB_TAG('C','h','e','r'), HB_SCRIPT_CHEROKEE},
{HB_TAG('C','p','r','t'), HB_SCRIPT_CYPRIOT},
{HB_TAG('C','y','r','l'), HB_SCRIPT_CYRILLIC},
{HB_TAG('C','y','r','s'), HB_SCRIPT_CYRILLIC},
{HB_TAG('D','e','v','a'), HB_SCRIPT_DEVANAGARI},
{HB_TAG('D','s','r','t'), HB_SCRIPT_DESERET},
{HB_TAG('E','g','y','p'), HB_SCRIPT_EGYPTIAN_HIEROGLYPHS},
{HB_TAG('E','t','h','i'), HB_SCRIPT_ETHIOPIC},
{HB_TAG('G','e','o','a'), HB_SCRIPT_GEORGIAN},
{HB_TAG('G','e','o','n'), HB_SCRIPT_GEORGIAN},
{HB_TAG('G','e','o','r'), HB_SCRIPT_GEORGIAN},
{HB_TAG('G','l','a','g'), HB_SCRIPT_GLAGOLITIC},
{HB_TAG('G','o','t','h'), HB_SCRIPT_GOTHIC},
{HB_TAG('G','r','e','k'), HB_SCRIPT_GREEK},
{HB_TAG('G','u','j','r'), HB_SCRIPT_GUJARATI},
{HB_TAG('G','u','r','u'), HB_SCRIPT_GURMUKHI},
{HB_TAG('H','a','n','g'), HB_SCRIPT_HANGUL},
{HB_TAG('H','a','n','i'), HB_SCRIPT_HAN},
{HB_TAG('H','a','n','o'), HB_SCRIPT_HANUNOO},
{HB_TAG('H','e','b','r'), HB_SCRIPT_HEBREW},
{HB_TAG('H','i','r','a'), HB_SCRIPT_HIRAGANA},
{HB_TAG('I','t','a','l'), HB_SCRIPT_OLD_ITALIC},
{HB_TAG('J','a','v','a'), HB_SCRIPT_JAVANESE},
{HB_TAG('K','a','l','i'), HB_SCRIPT_KAYAH_LI},
{HB_TAG('K','a','n','a'), HB_SCRIPT_KATAKANA},
{HB_TAG('K','h','a','r'), HB_SCRIPT_KHAROSHTHI},
{HB_TAG('K','h','m','r'), HB_SCRIPT_KHMER},
{HB_TAG('K','n','d','a'), HB_SCRIPT_KANNADA},
{HB_TAG('K','t','h','i'), HB_SCRIPT_KAITHI},
{HB_TAG('L','a','n','a'), HB_SCRIPT_TAI_THAM},
{HB_TAG('L','a','o','o'), HB_SCRIPT_LAO},
{HB_TAG('L','a','t','f'), HB_SCRIPT_LATIN},
{HB_TAG('L','a','t','g'), HB_SCRIPT_LATIN},
{HB_TAG('L','a','t','n'), HB_SCRIPT_LATIN},
{HB_TAG('L','e','p','c'), HB_SCRIPT_LEPCHA},
{HB_TAG('L','i','m','b'), HB_SCRIPT_LIMBU},
{HB_TAG('L','i','n','b'), HB_SCRIPT_LINEAR_B},
{HB_TAG('L','i','s','u'), HB_SCRIPT_LISU},
{HB_TAG('L','y','c','i'), HB_SCRIPT_LYCIAN},
{HB_TAG('L','y','d','i'), HB_SCRIPT_LYDIAN},
{HB_TAG('M','a','n','d'), HB_SCRIPT_MANDAIC},
{HB_TAG('M','l','y','m'), HB_SCRIPT_MALAYALAM},
{HB_TAG('M','o','n','g'), HB_SCRIPT_MONGOLIAN},
{HB_TAG('M','t','e','i'), HB_SCRIPT_MEETEI_MAYEK},
{HB_TAG('M','y','m','r'), HB_SCRIPT_MYANMAR},
{HB_TAG('N','k','o','o'), HB_SCRIPT_NKO},
{HB_TAG('O','g','a','m'), HB_SCRIPT_OGHAM},
{HB_TAG('O','l','c','k'), HB_SCRIPT_OL_CHIKI},
{HB_TAG('O','r','k','h'), HB_SCRIPT_OLD_TURKIC},
{HB_TAG('O','r','y','a'), HB_SCRIPT_ORIYA},
{HB_TAG('O','s','m','a'), HB_SCRIPT_OSMANYA},
{HB_TAG('P','h','a','g'), HB_SCRIPT_PHAGS_PA},
{HB_TAG('P','h','l','i'), HB_SCRIPT_INSCRIPTIONAL_PAHLAVI},
{HB_TAG('P','h','n','x'), HB_SCRIPT_PHOENICIAN},
{HB_TAG('P','r','t','i'), HB_SCRIPT_INSCRIPTIONAL_PARTHIAN},
{HB_TAG('Q','a','a','c'), HB_SCRIPT_COPTIC},
{HB_TAG('Q','a','a','i'), HB_SCRIPT_INHERITED},
{HB_TAG('R','j','n','g'), HB_SCRIPT_REJANG},
{HB_TAG('R','u','n','r'), HB_SCRIPT_RUNIC},
{HB_TAG('S','a','m','r'), HB_SCRIPT_SAMARITAN},
{HB_TAG('S','a','r','b'), HB_SCRIPT_OLD_SOUTH_ARABIAN},
{HB_TAG('S','a','u','r'), HB_SCRIPT_SAURASHTRA},
{HB_TAG('S','h','a','w'), HB_SCRIPT_SHAVIAN},
{HB_TAG('S','i','n','h'), HB_SCRIPT_SINHALA},
{HB_TAG('S','u','n','d'), HB_SCRIPT_SUNDANESE},
{HB_TAG('S','y','l','o'), HB_SCRIPT_SYLOTI_NAGRI},
{HB_TAG('S','y','r','c'), HB_SCRIPT_SYRIAC},
{HB_TAG('S','y','r','e'), HB_SCRIPT_SYRIAC},
{HB_TAG('S','y','r','n'), HB_SCRIPT_SYRIAC},
{HB_TAG('T','a','g','b'), HB_SCRIPT_TAGBANWA},
{HB_TAG('T','a','l','e'), HB_SCRIPT_TAI_LE},
{HB_TAG('T','a','l','u'), HB_SCRIPT_NEW_TAI_LUE},
{HB_TAG('T','a','m','l'), HB_SCRIPT_TAMIL},
{HB_TAG('T','a','v','t'), HB_SCRIPT_TAI_VIET},
{HB_TAG('T','e','l','u'), HB_SCRIPT_TELUGU},
{HB_TAG('T','f','n','g'), HB_SCRIPT_TIFINAGH},
{HB_TAG('T','g','l','g'), HB_SCRIPT_TAGALOG},
{HB_TAG('T','h','a','a'), HB_SCRIPT_THAANA},
{HB_TAG('T','h','a','i'), HB_SCRIPT_THAI},
{HB_TAG('T','i','b','t'), HB_SCRIPT_TIBETAN},
{HB_TAG('U','g','a','r'), HB_SCRIPT_UGARITIC},
{HB_TAG('V','a','i','i'), HB_SCRIPT_VAI},
{HB_TAG('X','p','e','o'), HB_SCRIPT_OLD_PERSIAN},
{HB_TAG('X','s','u','x'), HB_SCRIPT_CUNEIFORM},
{HB_TAG('Y','i','i','i'), HB_SCRIPT_YI},
{HB_TAG('Z','y','y','y'), HB_SCRIPT_COMMON},
{HB_TAG('Z','z','z','z'), HB_SCRIPT_UNKNOWN}
};
static int
_tag_cmp (hb_tag_t *pa, hb_tag_t *pb)
{
hb_tag_t a = *pa, b = *pb;
return a < b ? -1 : a == b ? 0 : +1;
}
hb_script_t
hb_script_from_iso15924_tag (hb_tag_t tag)
{
const struct tag_script_pair *pair;
/* Be lenient, adjust case (one capital letter followed by three small letters) */
tag = (tag & 0xDFDFDFDF) | 0x00202020;
pair = (const struct tag_script_pair *) bsearch (&tag,
script_from_iso15924_tag,
ARRAY_LENGTH (script_from_iso15924_tag),
sizeof (script_from_iso15924_tag[0]),
(hb_compare_func_t) _tag_cmp);
if (pair)
return pair->script;
/* If it looks right, just use the tag as a script */
if (((uint32_t) tag & 0xE0E0E0E0) == 0x40606060)
return (hb_script_t) tag;
/* Otherwise, return unknown */
return HB_SCRIPT_UNKNOWN;
}
hb_tag_t
hb_script_to_iso15924_tag (hb_script_t script)
{
if (likely ((unsigned int) script < ARRAY_LENGTH (script_to_iso15924_tag)))
return script_to_iso15924_tag[script];
/* if script is of the right shape (one capital letter followed by three small letters),
* return as is. */
if (((uint32_t) script & 0xE0E0E0E0) == 0x40606060)
return (hb_tag_t) script;
/* Otherwise, we don't know what that is */
return script_to_iso15924_tag[HB_SCRIPT_UNKNOWN];
}
#define LTR HB_DIRECTION_LTR
#define RTL HB_DIRECTION_RTL

View File

@ -290,7 +290,13 @@ hb_unicode_get_eastasian_width (hb_unicode_funcs_t *ufuncs,
hb_codepoint_t unicode);
/* Misc functions */
/* Script functions */
hb_script_t
hb_script_from_iso15924_tag (hb_tag_t tag);
hb_tag_t
hb_script_to_iso15924_tag (hb_script_t script);
hb_direction_t
hb_script_get_horizontal_direction (hb_script_t script);

View File

@ -55,6 +55,7 @@ static const char *text = NULL;
static const char *font_file = NULL;
static const char *out_file = "/dev/stdout";
static const char *language = NULL;
static const char *script = NULL;
/* Ugh, global vars. Ugly, but does the job */
static int width = 0;
@ -97,6 +98,7 @@ parse_opts (int argc, char **argv)
{"foreground", 1, 0, 'F'},
{"background", 1, 0, 'B'},
{"language", 1, 0, 'L'},
{"script", 1, 0, 'S'},
{"output", 1, 0, 'o'},
{0, 0, 0, 0}
};
@ -143,6 +145,9 @@ parse_opts (int argc, char **argv)
case 'L':
language = optarg;
break;
case 'S':
script = optarg;
break;
case 'o':
out_file = optarg;
break;
@ -184,7 +189,10 @@ _hb_cr_text_glyphs (cairo_t *cr,
hb_buffer_set_unicode_funcs (hb_buffer, hb_glib_get_unicode_funcs ());
hb_buffer_add_utf8 (hb_buffer, text, len, 0, len);
hb_buffer_set_script (hb_buffer, HB_SCRIPT_INVALID);
if (script)
hb_buffer_set_script (hb_buffer, hb_script_from_iso15924_tag (hb_tag_from_string (script)));
else
hb_buffer_set_script (hb_buffer, HB_SCRIPT_INVALID);
hb_buffer_set_direction (hb_buffer, HB_DIRECTION_INVALID);
hb_buffer_set_language (hb_buffer, hb_language_from_string (language));