Rename and revamp is_zero_width() to be is_default_ignorable()
That's really the logic desired. Except that MONGOLIAN VOWEL SEPARATOR is not default_ignorable but it really should be. Reported to Unicode. Based on suggestion from Konstantin Ritt.
This commit is contained in:
parent
a724139e64
commit
cf3afd8979
|
@ -105,7 +105,7 @@ _hb_fallback_shape (hb_shape_plan_t *shape_plan,
|
|||
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
{
|
||||
if (buffer->unicode->is_zero_width (buffer->info[i].codepoint)) {
|
||||
if (buffer->unicode->is_default_ignorable (buffer->info[i].codepoint)) {
|
||||
buffer->info[i].codepoint = space;
|
||||
buffer->pos[i].x_advance = 0;
|
||||
buffer->pos[i].y_advance = 0;
|
||||
|
|
|
@ -85,7 +85,7 @@ inline void
|
|||
_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
|
||||
{
|
||||
info->unicode_props0() = ((unsigned int) unicode->general_category (info->codepoint)) |
|
||||
(unicode->is_zero_width (info->codepoint) ? 0x80 : 0);
|
||||
(unicode->is_default_ignorable (info->codepoint) ? 0x80 : 0);
|
||||
info->unicode_props1() = unicode->modified_combining_class (info->codepoint);
|
||||
}
|
||||
|
||||
|
@ -108,7 +108,7 @@ _hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
|
|||
}
|
||||
|
||||
inline hb_bool_t
|
||||
_hb_glyph_info_is_zero_width (const hb_glyph_info_t *info)
|
||||
_hb_glyph_info_is_default_ignorable (const hb_glyph_info_t *info)
|
||||
{
|
||||
return !!(info->unicode_props0() & 0x80);
|
||||
}
|
||||
|
|
|
@ -519,7 +519,7 @@ hb_ot_hide_zerowidth (hb_ot_shape_context_t *c)
|
|||
unsigned int count = c->buffer->len;
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
if (unlikely (!is_a_ligature (c->buffer->info[i]) &&
|
||||
_hb_glyph_info_is_zero_width (&c->buffer->info[i])))
|
||||
_hb_glyph_info_is_default_ignorable (&c->buffer->info[i])))
|
||||
{
|
||||
if (!space) {
|
||||
/* We assume that the space glyph is not gid0. */
|
||||
|
|
|
@ -119,47 +119,73 @@ HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
|
|||
0xE0100, 0xE01EF)); /* VARIATION SELECTOR-17..256 */
|
||||
}
|
||||
|
||||
/* Zero-Width invisible characters:
|
||||
/* Default_Ignorable codepoints:
|
||||
*
|
||||
* 00AD SOFT HYPHEN
|
||||
* 034F COMBINING GRAPHEME JOINER
|
||||
* Note that as of Oct 2012 (Unicode 6.2), U+180E MONGOLIAN VOWEL SEPARATOR
|
||||
* is NOT Default_Ignorable, but it really behaves in a way that it should
|
||||
* be. That has been reported to the Unicode Technical Committee for
|
||||
* consideration. As such, we include it here, since Uniscribe removes it.
|
||||
*
|
||||
* 180E MONGOLIAN VOWEL SEPARATOR
|
||||
* Gathered from:
|
||||
* http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:DI:]&abb=on&ucd=on&esc=on
|
||||
*
|
||||
* 200B ZERO WIDTH SPACE
|
||||
* 200C ZERO WIDTH NON-JOINER
|
||||
* 200D ZERO WIDTH JOINER
|
||||
* 200E LEFT-TO-RIGHT MARK
|
||||
* 200F RIGHT-TO-LEFT MARK
|
||||
* Last updated to the page with the following versions:
|
||||
* Version 3.6; ICU version: 50.0.1.0; Unicode version: 6.1.0.0
|
||||
*
|
||||
* 2028 LINE SEPARATOR
|
||||
* 4,167 Code Points
|
||||
*
|
||||
* 202A LEFT-TO-RIGHT EMBEDDING
|
||||
* 202B RIGHT-TO-LEFT EMBEDDING
|
||||
* 202C POP DIRECTIONAL FORMATTING
|
||||
* 202D LEFT-TO-RIGHT OVERRIDE
|
||||
* 202E RIGHT-TO-LEFT OVERRIDE
|
||||
* [\u00AD\u034F\u115F\u1160\u17B4\u17B5\u180B-\u180D\u200B-\u200F\u202A-\u202E\u2060-\u206F\u3164\uFE00-\uFE0F\uFEFF\uFFA0\uFFF0-\uFFF8\U0001D173-\U0001D17A\U000E0000-\U000E0FFF]
|
||||
*
|
||||
* 2060 WORD JOINER
|
||||
* 2061 FUNCTION APPLICATION
|
||||
* 2062 INVISIBLE TIMES
|
||||
* 2063 INVISIBLE SEPARATOR
|
||||
*
|
||||
* FEFF ZERO WIDTH NO-BREAK SPACE
|
||||
* 00AD ;SOFT HYPHEN
|
||||
* 034F ;COMBINING GRAPHEME JOINER
|
||||
* 115F ;HANGUL CHOSEONG FILLER
|
||||
* 1160 ;HANGUL JUNGSEONG FILLER
|
||||
* 17B4 ;KHMER VOWEL INHERENT AQ
|
||||
* 17B5 ;KHMER VOWEL INHERENT AA
|
||||
* 180B..180D ;MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
* 200B..200F ;RIGHT-TO-LEFT MARK
|
||||
* 202A..202E ;RIGHT-TO-LEFT OVERRIDE
|
||||
* 2060..206F ;NOMINAL DIGIT SHAPES
|
||||
* 3164 ;HANGUL FILLER
|
||||
* FE00..FE0F ;VARIATION SELECTOR-16
|
||||
* FEFF ;ZERO WIDTH NO-BREAK SPACE
|
||||
* FFA0 ;HALFWIDTH HANGUL FILLER
|
||||
* FFF0..FFF8 ;<unassigned-FFF8>
|
||||
* 1D173..1D17A ;MUSICAL SYMBOL END PHRASE
|
||||
* E0000..E0FFF ;<unassigned-E0FFF>
|
||||
*/
|
||||
inline hb_bool_t
|
||||
is_zero_width (hb_codepoint_t ch)
|
||||
is_default_ignorable (hb_codepoint_t ch)
|
||||
{
|
||||
return ((ch & ~0x007F) == 0x2000 && (hb_in_ranges<hb_codepoint_t> (ch,
|
||||
0x200B, 0x200F,
|
||||
0x202A, 0x202E,
|
||||
0x2060, 0x2064) ||
|
||||
(ch == 0x2028))) ||
|
||||
unlikely (ch == 0x0009 ||
|
||||
ch == 0x00AD ||
|
||||
ch == 0x034F ||
|
||||
ch == 0x180E ||
|
||||
ch == 0xFEFF);
|
||||
hb_codepoint_t plane = ch >> 16;
|
||||
if (likely (plane == 0))
|
||||
{
|
||||
/* BMP */
|
||||
hb_codepoint_t page = ch >> 8;
|
||||
switch (page) {
|
||||
case 0x00: return unlikely (ch == 0x00AD);
|
||||
case 0x03: return unlikely (ch == 0x034F);
|
||||
case 0x11: return hb_in_range<hb_codepoint_t> (ch, 0x115F, 0x1160);
|
||||
case 0x17: return hb_in_range<hb_codepoint_t> (ch, 0x17B4, 0x17B5);
|
||||
case 0x18: return hb_in_range<hb_codepoint_t> (ch, 0x180B, 0x180E);
|
||||
case 0x20: return hb_in_ranges<hb_codepoint_t> (ch, 0x200B, 0x200F,
|
||||
0x202A, 0x202E,
|
||||
0x2060, 0x206F);
|
||||
case 0x31: return unlikely (ch == 0x3164);
|
||||
case 0xFE: return hb_in_range<hb_codepoint_t> (ch, 0xFE00, 0xFE0F) || ch == 0xFEFF;
|
||||
case 0xFF: return hb_in_range<hb_codepoint_t> (ch, 0xFFF0, 0xFFF8) || ch == 0xFFA0;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Other planes */
|
||||
switch (plane) {
|
||||
case 0x01: return hb_in_range<hb_codepoint_t> (ch, 0x0001D173, 0x0001D17A);
|
||||
case 0x0E: return hb_in_range<hb_codepoint_t> (ch, 0x000E0000, 0x000E0FFF);
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue