Add Unicode space category

Unused so far.
This commit is contained in:
Behdad Esfahbod 2015-11-04 14:18:39 -08:00
parent 8249ec3f86
commit 9ac4b9656d
1 changed files with 76 additions and 2 deletions

View File

@ -213,6 +213,23 @@ _next_syllable (hb_buffer_t *buffer, unsigned int start)
/* unicode_props */
/* Design:
* unicode_props() is a two-byte number. The low byte includes:
* - General_Category: 5 bits.
* - A bit each for:
* * Is it Default_Ignorable(); we have a modified Default_Ignorable().
* * Is it U+200D ZWJ?
* * Is it U+200C ZWNJ?
*
* The high-byte has different meanings, switched by the Gen-Cat:
* - For Mn,Mc,Me: the modified Combining_Class.
* - For Ws: index of which space character this is, if space fallback
* is needed, ie. we don't set this by default, only if asked to.
*
* If needed, we can use the ZWJ/ZWNJ to use the high byte as well,
* freeing two more bits.
*/
enum {
UPROPS_MASK_ZWJ = 0x20u,
UPROPS_MASK_ZWNJ = 0x40u,
@ -220,6 +237,23 @@ enum {
UPROPS_MASK_GEN_CAT = 0x1Fu
};
enum space_t {
SPACE = 0,
SPACE_NBSP,
SPACE_EN,
SPACE_EM,
SPACE_EM_3,
SPACE_EM_4,
SPACE_EM_6,
SPACE_FIGURE,
SPACE_PUNCTUATION,
SPACE_THIN,
SPACE_HAIR,
SPACE_NARROW,
SPACE_MEDIUM,
SPACE_IDEOGRAPHIC,
};
static inline void
_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
{
@ -246,6 +280,9 @@ _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *uni
* property value 0.
* 1.1.5+
* """
*
* Also, all Mn's that are Default_Ignorable, have ccc=0, hence
* the "else if".
*/
props |= unicode->modified_combining_class (info->codepoint)<<8;
}
@ -273,7 +310,6 @@ _hb_glyph_info_is_unicode_mark (const hb_glyph_info_t *info)
{
return HB_UNICODE_GENERAL_CATEGORY_IS_MARK (info->unicode_props() & UPROPS_MASK_GEN_CAT);
}
static inline void
_hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info,
unsigned int modified_class)
@ -282,13 +318,51 @@ _hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info,
return;
info->unicode_props() = (modified_class<<8) | (info->unicode_props() & 0xFF);
}
static inline unsigned int
_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
{
return _hb_glyph_info_is_unicode_mark (info) ? info->unicode_props()>>8 : 0;
}
static inline bool
_hb_glyph_info_is_unicode_space (const hb_glyph_info_t *info)
{
return _hb_glyph_info_get_general_category (info) ==
HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR;
}
static inline void
_hb_glyph_info_set_unicode_space_for_char (hb_glyph_info_t *info, hb_codepoint_t u)
{
if (unlikely (!_hb_glyph_info_is_unicode_space (info)))
return;
space_t s;
switch (u)
{
default: s = SPACE; break; /* Shouldn't happen. */
case 0x00A0u: s = SPACE_NBSP; break;
case 0x2002u: s = SPACE_EN; break;
case 0x2003u: s = SPACE_EM; break;
case 0x2004u: s = SPACE_EM_3; break;
case 0x2005u: s = SPACE_EM_4; break;
case 0x2006u: s = SPACE_EM_6; break;
case 0x2007u: s = SPACE_FIGURE; break;
case 0x2008u: s = SPACE_PUNCTUATION;break;
case 0x2009u: s = SPACE_THIN; break;
case 0x200Au: s = SPACE_HAIR; break;
case 0x202Fu: s = SPACE_NARROW; break;
case 0x205Fu: s = SPACE_MEDIUM; break;
case 0x3000u: s = SPACE_IDEOGRAPHIC;break;
}
info->unicode_props() = (((unsigned int) s)<<8) | (info->unicode_props() & 0xFF);
}
static inline space_t
_hb_glyph_info_get_unicode_space (const hb_glyph_info_t *info)
{
return _hb_glyph_info_is_unicode_space (info) ? (space_t) (info->unicode_props()>>8) : SPACE;
}
static inline bool _hb_glyph_info_ligated (const hb_glyph_info_t *info);
static inline hb_bool_t