diff --git a/src/hb-ot-shape-complex-arabic.cc b/src/hb-ot-shape-complex-arabic.cc index 880a6b987..746f2d7d1 100644 --- a/src/hb-ot-shape-complex-arabic.cc +++ b/src/hb-ot-shape-complex-arabic.cc @@ -25,6 +25,7 @@ */ #include "hb-ot-shape-complex-private.hh" +#include "hb-ot-shape-private.hh" @@ -248,7 +249,7 @@ _hb_ot_shape_complex_setup_masks_arabic (hb_ot_map_t *map, hb_buffer_t *buffer, for (unsigned int i = 0; i < count; i++) { - unsigned int this_type = get_joining_type (buffer->info[i].codepoint, (hb_unicode_general_category_t) buffer->info[i].general_category()); + unsigned int this_type = get_joining_type (buffer->info[i].codepoint, _hb_glyph_info_get_general_category (&buffer->info[i])); if (unlikely (this_type == JOINING_TYPE_T)) { buffer->info[i].arabic_shaping_action() = NONE; diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 6f14777ce..f198fba83 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -432,24 +432,6 @@ found_non_indic (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t *mask_ar #include "hb-ot-shape-complex-indic-machine.hh" -static void -remove_joiners (hb_buffer_t *buffer) -{ - /* For now we remove joiners. However, Uniscbire seems to keep them - * and output a zero-width space glyph for them. It is not clear to - * me how that is supposed to interact with GSUB. */ - - buffer->clear_output (); - unsigned int count = buffer->len; - for (buffer->idx = 0; buffer->idx < count;) - if (unlikely (is_joiner (buffer->info[buffer->idx]))) - buffer->skip_glyph (); - else - buffer->next_glyph (); - - buffer->swap_buffers (); -} - static void initial_reordering (const hb_ot_map_t *map, hb_face_t *face, @@ -462,8 +444,6 @@ initial_reordering (const hb_ot_map_t *map, mask_array[i] = map->get_1_mask (indic_basic_features[i].tag); find_syllables (map, buffer, mask_array); - - remove_joiners (buffer); } static void diff --git a/src/hb-ot-shape-complex-private.hh b/src/hb-ot-shape-complex-private.hh index 38edaa0cd..3f99781f6 100644 --- a/src/hb-ot-shape-complex-private.hh +++ b/src/hb-ot-shape-complex-private.hh @@ -35,8 +35,8 @@ /* buffer var allocations, used during the entire shaping process */ -#define general_category() var1.u8[0] /* unicode general_category (hb_unicode_general_category_t) */ -#define combining_class() var1.u8[1] /* unicode combining_class (uint8_t) */ +#define unicode_props0() var1.u8[0] +#define unicode_props1() var1.u8[1] /* buffer var allocations, used by complex shapers */ #define complex_var_persistent_u8_0() var2.u8[0] diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc index 0bcf7f58a..4a378a855 100644 --- a/src/hb-ot-shape-normalize.cc +++ b/src/hb-ot-shape-normalize.cc @@ -68,19 +68,12 @@ * matra for the Indic shaper. */ -static inline void -set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode) -{ - info->general_category() = hb_unicode_general_category (unicode, info->codepoint); - info->combining_class() = _hb_unicode_modified_combining_class (unicode, info->codepoint); -} - static void output_glyph (hb_font_t *font, hb_buffer_t *buffer, hb_codepoint_t glyph) { buffer->output_glyph (glyph); - set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode); + _hb_glyph_info_set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode); } static bool @@ -163,8 +156,8 @@ decompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer, static int compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) { - unsigned int a = pa->combining_class(); - unsigned int b = pb->combining_class(); + unsigned int a = _hb_glyph_info_get_modified_combining_class (pa); + unsigned int b = _hb_glyph_info_get_modified_combining_class (pb); return a < b ? -1 : a == b ? 0 : +1; } @@ -214,12 +207,12 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, count = buffer->len; for (unsigned int i = 0; i < count; i++) { - if (buffer->info[i].combining_class() == 0) + if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0) continue; unsigned int end; for (end = i + 1; end < count; end++) - if (buffer->info[end].combining_class() == 0) + if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0) break; /* We are going to do a bubble-sort. Only do this if the @@ -254,11 +247,11 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, if (/* If mode is NOT COMPOSED_FULL (ie. it's COMPOSED_DIACRITICS), we don't try to * compose a CCC=0 character with it's preceding starter. */ (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL || - buffer->info[buffer->idx].combining_class() != 0) && + _hb_glyph_info_get_modified_combining_class (&buffer->info[buffer->idx]) != 0) && /* If there's anything between the starter and this char, they should have CCC * smaller than this character's. */ (starter == buffer->out_len - 1 || - buffer->out_info[buffer->out_len - 1].combining_class() < buffer->info[buffer->idx].combining_class()) && + _hb_glyph_info_get_modified_combining_class (&buffer->out_info[buffer->out_len - 1]) < _hb_glyph_info_get_modified_combining_class (&buffer->info[buffer->idx])) && /* And compose. */ hb_unicode_compose (buffer->unicode, buffer->out_info[starter].codepoint, @@ -270,7 +263,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, /* Composes. Modify starter and carry on. */ buffer->out_info[starter].codepoint = composed; /* XXX update cluster */ - set_unicode_props (&buffer->out_info[starter], buffer->unicode); + _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unicode); buffer->skip_glyph (); continue; @@ -279,7 +272,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, /* Blocked, or doesn't compose. */ buffer->next_glyph (); - if (buffer->out_info[buffer->out_len - 1].combining_class() == 0) + if (_hb_glyph_info_get_modified_combining_class (&buffer->out_info[buffer->out_len - 1]) == 0) starter = buffer->out_len - 1; } buffer->swap_buffers (); diff --git a/src/hb-ot-shape-private.hh b/src/hb-ot-shape-private.hh index 5fc69b11f..df0c7052e 100644 --- a/src/hb-ot-shape-private.hh +++ b/src/hb-ot-shape-private.hh @@ -53,4 +53,31 @@ _hb_ot_shape (hb_font_t *font, const hb_feature_t *features, unsigned int num_features); + +inline void +_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode) +{ + info->unicode_props0() = ((unsigned int) hb_unicode_general_category (unicode, info->codepoint)) | + (_hb_unicode_is_zero_width (info->codepoint) ? 0x80 : 0); + info->unicode_props1() = _hb_unicode_modified_combining_class (unicode, info->codepoint); +} + +inline hb_unicode_general_category_t +_hb_glyph_info_get_general_category (const hb_glyph_info_t *info) +{ + return (hb_unicode_general_category_t) (info->unicode_props0() & 0x7F); +} + +inline unsigned int +_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info) +{ + return info->unicode_props1(); +} + +inline hb_bool_t +_hb_glyph_info_is_zero_width (const hb_glyph_info_t *info) +{ + return !!(info->unicode_props0() & 0x80); +} + #endif /* HB_OT_SHAPE_PRIVATE_HH */ diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index 167b1d7c9..dbfcf18fa 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -43,6 +43,7 @@ hb_tag_t common_features[] = { HB_TAG('r','l','i','g'), }; + hb_tag_t horizontal_features[] = { HB_TAG('c','a','l','t'), HB_TAG('c','l','i','g'), @@ -170,19 +171,12 @@ hb_ot_shape_setup_masks (hb_ot_shape_context_t *c) /* Prepare */ -static inline void -set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode) -{ - info->general_category() = hb_unicode_general_category (unicode, info->codepoint); - info->combining_class() = _hb_unicode_modified_combining_class (unicode, info->codepoint); -} - static void hb_set_unicode_props (hb_buffer_t *buffer) { unsigned int count = buffer->len; for (unsigned int i = 0; i < count; i++) - set_unicode_props (&buffer->info[i], buffer->unicode); + _hb_glyph_info_set_unicode_props (&buffer->info[i], buffer->unicode); } static void @@ -190,7 +184,7 @@ hb_form_clusters (hb_buffer_t *buffer) { unsigned int count = buffer->len; for (unsigned int i = 1; i < count; i++) - if (FLAG (buffer->info[i].general_category()) & + if (FLAG (_hb_glyph_info_get_general_category (&buffer->info[i])) & (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) @@ -379,6 +373,23 @@ hb_position_complex_fallback_visual (hb_ot_shape_context_t *c) hb_truetype_kern (c); } +static void +hb_hide_zerowidth (hb_ot_shape_context_t *c) +{ + /* TODO Save the space character in the font? */ + hb_codepoint_t space; + if (!hb_font_get_glyph (c->font, ' ', 0, &space)) + return; /* No point! */ + + unsigned int count = c->buffer->len; + for (unsigned int i = 0; i < count; i++) + if (unlikely (_hb_glyph_info_is_zero_width (&c->buffer->info[i]))) { + c->buffer->info[i].codepoint = space; + c->buffer->pos[i].x_advance = 0; + c->buffer->pos[i].y_advance = 0; + } +} + /* Do it! */ @@ -390,10 +401,10 @@ hb_ot_shape_execute_internal (hb_ot_shape_context_t *c) /* Save the original direction, we use it later. */ c->target_direction = c->buffer->props.direction; - HB_BUFFER_ALLOCATE_VAR (c->buffer, general_category); - HB_BUFFER_ALLOCATE_VAR (c->buffer, combining_class); + HB_BUFFER_ALLOCATE_VAR (c->buffer, unicode_props0); + HB_BUFFER_ALLOCATE_VAR (c->buffer, unicode_props1); - hb_set_unicode_props (c->buffer); /* BUFFER: Set general_category and combining_class */ + hb_set_unicode_props (c->buffer); hb_form_clusters (c->buffer); @@ -427,8 +438,10 @@ hb_ot_shape_execute_internal (hb_ot_shape_context_t *c) hb_position_complex_fallback_visual (c); } - HB_BUFFER_DEALLOCATE_VAR (c->buffer, combining_class); - HB_BUFFER_DEALLOCATE_VAR (c->buffer, general_category); + hb_hide_zerowidth (c); + + HB_BUFFER_DEALLOCATE_VAR (c->buffer, unicode_props1); + HB_BUFFER_DEALLOCATE_VAR (c->buffer, unicode_props0); c->buffer->props.direction = c->target_direction; diff --git a/src/hb-unicode-private.hh b/src/hb-unicode-private.hh index ad85be7b9..c06dfe5fd 100644 --- a/src/hb-unicode-private.hh +++ b/src/hb-unicode-private.hh @@ -114,5 +114,43 @@ _hb_unicode_is_variation_selector (hb_codepoint_t unicode) (unicode >= 0xE0100 && unicode <= 0xE01EF)); /* VARIATION SELECTOR-17..256 */ } +/* Zero-Width invisible characters: + * + * 00AD SOFT HYPHEN + * 034F COMBINING GRAPHEME JOINER + * + * 200B ZERO WIDTH SPACE + * 200C ZERO WIDTH NON-JOINER + * 200D ZERO WIDTH JOINER + * 200E LEFT-TO-RIGHT MARK + * 200F RIGHT-TO-LEFT MARK + * + * 2028 LINE SEPARATOR + * + * 202A LEFT-TO-RIGHT EMBEDDING + * 202B RIGHT-TO-LEFT EMBEDDING + * 202C POP DIRECTIONAL FORMATTING + * 202D LEFT-TO-RIGHT OVERRIDE + * 202E RIGHT-TO-LEFT OVERRIDE + * + * 2060 WORD JOINER + * 2061 FUNCTION APPLICATION + * 2062 INVISIBLE TIMES + * 2063 INVISIBLE SEPARATOR + * + * FEFF ZERO WIDTH NO-BREAK SPACE + */ +static inline hb_bool_t +_hb_unicode_is_zero_width (hb_codepoint_t ch) +{ + return ((ch & ~0x007F) == 0x2000 && ( + (ch >= 0x200B && ch <= 0x200F) || + (ch >= 0x202A && ch <= 0x202E) || + (ch >= 0x2060 && ch <= 0x2063) || + (ch == 0x2028) + )) || unlikely (ch == 0x00AD + || ch == 0x034F + || ch == 0xFEFF); +} #endif /* HB_UNICODE_PRIVATE_HH */