diff --git a/src/hb-fallback-shape.cc b/src/hb-fallback-shape.cc index d7bde090d..3f9024feb 100644 --- a/src/hb-fallback-shape.cc +++ b/src/hb-fallback-shape.cc @@ -105,7 +105,7 @@ _hb_fallback_shape (hb_shape_plan_t *shape_plan, for (unsigned int i = 0; i < count; i++) { - if (buffer->unicode->is_zero_width (buffer->info[i].codepoint)) { + if (buffer->unicode->is_default_ignorable (buffer->info[i].codepoint)) { buffer->info[i].codepoint = space; buffer->pos[i].x_advance = 0; buffer->pos[i].y_advance = 0; diff --git a/src/hb-ot-shape-private.hh b/src/hb-ot-shape-private.hh index c4c368db7..ae01215f0 100644 --- a/src/hb-ot-shape-private.hh +++ b/src/hb-ot-shape-private.hh @@ -85,7 +85,7 @@ inline void _hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode) { info->unicode_props0() = ((unsigned int) unicode->general_category (info->codepoint)) | - (unicode->is_zero_width (info->codepoint) ? 0x80 : 0); + (unicode->is_default_ignorable (info->codepoint) ? 0x80 : 0); info->unicode_props1() = unicode->modified_combining_class (info->codepoint); } @@ -108,7 +108,7 @@ _hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info) } inline hb_bool_t -_hb_glyph_info_is_zero_width (const hb_glyph_info_t *info) +_hb_glyph_info_is_default_ignorable (const hb_glyph_info_t *info) { return !!(info->unicode_props0() & 0x80); } diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index 313766af3..9a6260a30 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -519,7 +519,7 @@ hb_ot_hide_zerowidth (hb_ot_shape_context_t *c) unsigned int count = c->buffer->len; for (unsigned int i = 0; i < count; i++) if (unlikely (!is_a_ligature (c->buffer->info[i]) && - _hb_glyph_info_is_zero_width (&c->buffer->info[i]))) + _hb_glyph_info_is_default_ignorable (&c->buffer->info[i]))) { if (!space) { /* We assume that the space glyph is not gid0. */ diff --git a/src/hb-unicode-private.hh b/src/hb-unicode-private.hh index 9f24a9fbc..7be4b04da 100644 --- a/src/hb-unicode-private.hh +++ b/src/hb-unicode-private.hh @@ -119,47 +119,73 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE 0xE0100, 0xE01EF)); /* VARIATION SELECTOR-17..256 */ } - /* Zero-Width invisible characters: + /* Default_Ignorable codepoints: * - * 00AD SOFT HYPHEN - * 034F COMBINING GRAPHEME JOINER + * Note that as of Oct 2012 (Unicode 6.2), U+180E MONGOLIAN VOWEL SEPARATOR + * is NOT Default_Ignorable, but it really behaves in a way that it should + * be. That has been reported to the Unicode Technical Committee for + * consideration. As such, we include it here, since Uniscribe removes it. * - * 180E MONGOLIAN VOWEL SEPARATOR + * Gathered from: + * http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:DI:]&abb=on&ucd=on&esc=on * - * 200B ZERO WIDTH SPACE - * 200C ZERO WIDTH NON-JOINER - * 200D ZERO WIDTH JOINER - * 200E LEFT-TO-RIGHT MARK - * 200F RIGHT-TO-LEFT MARK + * Last updated to the page with the following versions: + * Version 3.6; ICU version: 50.0.1.0; Unicode version: 6.1.0.0 * - * 2028 LINE SEPARATOR + * 4,167 Code Points * - * 202A LEFT-TO-RIGHT EMBEDDING - * 202B RIGHT-TO-LEFT EMBEDDING - * 202C POP DIRECTIONAL FORMATTING - * 202D LEFT-TO-RIGHT OVERRIDE - * 202E RIGHT-TO-LEFT OVERRIDE + * [\u00AD\u034F\u115F\u1160\u17B4\u17B5\u180B-\u180D\u200B-\u200F\u202A-\u202E\u2060-\u206F\u3164\uFE00-\uFE0F\uFEFF\uFFA0\uFFF0-\uFFF8\U0001D173-\U0001D17A\U000E0000-\U000E0FFF] * - * 2060 WORD JOINER - * 2061 FUNCTION APPLICATION - * 2062 INVISIBLE TIMES - * 2063 INVISIBLE SEPARATOR - * - * FEFF ZERO WIDTH NO-BREAK SPACE + * 00AD ;SOFT HYPHEN + * 034F ;COMBINING GRAPHEME JOINER + * 115F ;HANGUL CHOSEONG FILLER + * 1160 ;HANGUL JUNGSEONG FILLER + * 17B4 ;KHMER VOWEL INHERENT AQ + * 17B5 ;KHMER VOWEL INHERENT AA + * 180B..180D ;MONGOLIAN FREE VARIATION SELECTOR THREE + * 200B..200F ;RIGHT-TO-LEFT MARK + * 202A..202E ;RIGHT-TO-LEFT OVERRIDE + * 2060..206F ;NOMINAL DIGIT SHAPES + * 3164 ;HANGUL FILLER + * FE00..FE0F ;VARIATION SELECTOR-16 + * FEFF ;ZERO WIDTH NO-BREAK SPACE + * FFA0 ;HALFWIDTH HANGUL FILLER + * FFF0..FFF8 ; + * 1D173..1D17A ;MUSICAL SYMBOL END PHRASE + * E0000..E0FFF ; */ inline hb_bool_t - is_zero_width (hb_codepoint_t ch) + is_default_ignorable (hb_codepoint_t ch) { - return ((ch & ~0x007F) == 0x2000 && (hb_in_ranges (ch, - 0x200B, 0x200F, - 0x202A, 0x202E, - 0x2060, 0x2064) || - (ch == 0x2028))) || - unlikely (ch == 0x0009 || - ch == 0x00AD || - ch == 0x034F || - ch == 0x180E || - ch == 0xFEFF); + hb_codepoint_t plane = ch >> 16; + if (likely (plane == 0)) + { + /* BMP */ + hb_codepoint_t page = ch >> 8; + switch (page) { + case 0x00: return unlikely (ch == 0x00AD); + case 0x03: return unlikely (ch == 0x034F); + case 0x11: return hb_in_range (ch, 0x115F, 0x1160); + case 0x17: return hb_in_range (ch, 0x17B4, 0x17B5); + case 0x18: return hb_in_range (ch, 0x180B, 0x180E); + case 0x20: return hb_in_ranges (ch, 0x200B, 0x200F, + 0x202A, 0x202E, + 0x2060, 0x206F); + case 0x31: return unlikely (ch == 0x3164); + case 0xFE: return hb_in_range (ch, 0xFE00, 0xFE0F) || ch == 0xFEFF; + case 0xFF: return hb_in_range (ch, 0xFFF0, 0xFFF8) || ch == 0xFFA0; + default: return false; + } + } + else + { + /* Other planes */ + switch (plane) { + case 0x01: return hb_in_range (ch, 0x0001D173, 0x0001D17A); + case 0x0E: return hb_in_range (ch, 0x000E0000, 0x000E0FFF); + default: return false; + } + } }