Replace zerowidth invisible chars with a zero-advance space glyph

Like Uniscribe does.
This commit is contained in:
Behdad Esfahbod 2012-05-09 15:04:13 +02:00
parent 49e5da1591
commit d1deaa2f5b
7 changed files with 105 additions and 53 deletions

View File

@ -25,6 +25,7 @@
*/ */
#include "hb-ot-shape-complex-private.hh" #include "hb-ot-shape-complex-private.hh"
#include "hb-ot-shape-private.hh"
@ -248,7 +249,7 @@ _hb_ot_shape_complex_setup_masks_arabic (hb_ot_map_t *map, hb_buffer_t *buffer,
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
{ {
unsigned int this_type = get_joining_type (buffer->info[i].codepoint, (hb_unicode_general_category_t) buffer->info[i].general_category()); unsigned int this_type = get_joining_type (buffer->info[i].codepoint, _hb_glyph_info_get_general_category (&buffer->info[i]));
if (unlikely (this_type == JOINING_TYPE_T)) { if (unlikely (this_type == JOINING_TYPE_T)) {
buffer->info[i].arabic_shaping_action() = NONE; buffer->info[i].arabic_shaping_action() = NONE;

View File

@ -432,24 +432,6 @@ found_non_indic (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t *mask_ar
#include "hb-ot-shape-complex-indic-machine.hh" #include "hb-ot-shape-complex-indic-machine.hh"
static void
remove_joiners (hb_buffer_t *buffer)
{
/* For now we remove joiners. However, Uniscbire seems to keep them
* and output a zero-width space glyph for them. It is not clear to
* me how that is supposed to interact with GSUB. */
buffer->clear_output ();
unsigned int count = buffer->len;
for (buffer->idx = 0; buffer->idx < count;)
if (unlikely (is_joiner (buffer->info[buffer->idx])))
buffer->skip_glyph ();
else
buffer->next_glyph ();
buffer->swap_buffers ();
}
static void static void
initial_reordering (const hb_ot_map_t *map, initial_reordering (const hb_ot_map_t *map,
hb_face_t *face, hb_face_t *face,
@ -462,8 +444,6 @@ initial_reordering (const hb_ot_map_t *map,
mask_array[i] = map->get_1_mask (indic_basic_features[i].tag); mask_array[i] = map->get_1_mask (indic_basic_features[i].tag);
find_syllables (map, buffer, mask_array); find_syllables (map, buffer, mask_array);
remove_joiners (buffer);
} }
static void static void

View File

@ -35,8 +35,8 @@
/* buffer var allocations, used during the entire shaping process */ /* buffer var allocations, used during the entire shaping process */
#define general_category() var1.u8[0] /* unicode general_category (hb_unicode_general_category_t) */ #define unicode_props0() var1.u8[0]
#define combining_class() var1.u8[1] /* unicode combining_class (uint8_t) */ #define unicode_props1() var1.u8[1]
/* buffer var allocations, used by complex shapers */ /* buffer var allocations, used by complex shapers */
#define complex_var_persistent_u8_0() var2.u8[0] #define complex_var_persistent_u8_0() var2.u8[0]

View File

@ -68,19 +68,12 @@
* matra for the Indic shaper. * matra for the Indic shaper.
*/ */
static inline void
set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
{
info->general_category() = hb_unicode_general_category (unicode, info->codepoint);
info->combining_class() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
}
static void static void
output_glyph (hb_font_t *font, hb_buffer_t *buffer, output_glyph (hb_font_t *font, hb_buffer_t *buffer,
hb_codepoint_t glyph) hb_codepoint_t glyph)
{ {
buffer->output_glyph (glyph); buffer->output_glyph (glyph);
set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode); _hb_glyph_info_set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode);
} }
static bool static bool
@ -163,8 +156,8 @@ decompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer,
static int static int
compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
{ {
unsigned int a = pa->combining_class(); unsigned int a = _hb_glyph_info_get_modified_combining_class (pa);
unsigned int b = pb->combining_class(); unsigned int b = _hb_glyph_info_get_modified_combining_class (pb);
return a < b ? -1 : a == b ? 0 : +1; return a < b ? -1 : a == b ? 0 : +1;
} }
@ -214,12 +207,12 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
count = buffer->len; count = buffer->len;
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
{ {
if (buffer->info[i].combining_class() == 0) if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0)
continue; continue;
unsigned int end; unsigned int end;
for (end = i + 1; end < count; end++) for (end = i + 1; end < count; end++)
if (buffer->info[end].combining_class() == 0) if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
break; break;
/* We are going to do a bubble-sort. Only do this if the /* We are going to do a bubble-sort. Only do this if the
@ -254,11 +247,11 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
if (/* If mode is NOT COMPOSED_FULL (ie. it's COMPOSED_DIACRITICS), we don't try to if (/* If mode is NOT COMPOSED_FULL (ie. it's COMPOSED_DIACRITICS), we don't try to
* compose a CCC=0 character with it's preceding starter. */ * compose a CCC=0 character with it's preceding starter. */
(mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL || (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL ||
buffer->info[buffer->idx].combining_class() != 0) && _hb_glyph_info_get_modified_combining_class (&buffer->info[buffer->idx]) != 0) &&
/* If there's anything between the starter and this char, they should have CCC /* If there's anything between the starter and this char, they should have CCC
* smaller than this character's. */ * smaller than this character's. */
(starter == buffer->out_len - 1 || (starter == buffer->out_len - 1 ||
buffer->out_info[buffer->out_len - 1].combining_class() < buffer->info[buffer->idx].combining_class()) && _hb_glyph_info_get_modified_combining_class (&buffer->out_info[buffer->out_len - 1]) < _hb_glyph_info_get_modified_combining_class (&buffer->info[buffer->idx])) &&
/* And compose. */ /* And compose. */
hb_unicode_compose (buffer->unicode, hb_unicode_compose (buffer->unicode,
buffer->out_info[starter].codepoint, buffer->out_info[starter].codepoint,
@ -270,7 +263,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
/* Composes. Modify starter and carry on. */ /* Composes. Modify starter and carry on. */
buffer->out_info[starter].codepoint = composed; buffer->out_info[starter].codepoint = composed;
/* XXX update cluster */ /* XXX update cluster */
set_unicode_props (&buffer->out_info[starter], buffer->unicode); _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unicode);
buffer->skip_glyph (); buffer->skip_glyph ();
continue; continue;
@ -279,7 +272,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
/* Blocked, or doesn't compose. */ /* Blocked, or doesn't compose. */
buffer->next_glyph (); buffer->next_glyph ();
if (buffer->out_info[buffer->out_len - 1].combining_class() == 0) if (_hb_glyph_info_get_modified_combining_class (&buffer->out_info[buffer->out_len - 1]) == 0)
starter = buffer->out_len - 1; starter = buffer->out_len - 1;
} }
buffer->swap_buffers (); buffer->swap_buffers ();

View File

@ -53,4 +53,31 @@ _hb_ot_shape (hb_font_t *font,
const hb_feature_t *features, const hb_feature_t *features,
unsigned int num_features); unsigned int num_features);
inline void
_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
{
info->unicode_props0() = ((unsigned int) hb_unicode_general_category (unicode, info->codepoint)) |
(_hb_unicode_is_zero_width (info->codepoint) ? 0x80 : 0);
info->unicode_props1() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
}
inline hb_unicode_general_category_t
_hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
{
return (hb_unicode_general_category_t) (info->unicode_props0() & 0x7F);
}
inline unsigned int
_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
{
return info->unicode_props1();
}
inline hb_bool_t
_hb_glyph_info_is_zero_width (const hb_glyph_info_t *info)
{
return !!(info->unicode_props0() & 0x80);
}
#endif /* HB_OT_SHAPE_PRIVATE_HH */ #endif /* HB_OT_SHAPE_PRIVATE_HH */

View File

@ -43,6 +43,7 @@ hb_tag_t common_features[] = {
HB_TAG('r','l','i','g'), HB_TAG('r','l','i','g'),
}; };
hb_tag_t horizontal_features[] = { hb_tag_t horizontal_features[] = {
HB_TAG('c','a','l','t'), HB_TAG('c','a','l','t'),
HB_TAG('c','l','i','g'), HB_TAG('c','l','i','g'),
@ -170,19 +171,12 @@ hb_ot_shape_setup_masks (hb_ot_shape_context_t *c)
/* Prepare */ /* Prepare */
static inline void
set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
{
info->general_category() = hb_unicode_general_category (unicode, info->codepoint);
info->combining_class() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
}
static void static void
hb_set_unicode_props (hb_buffer_t *buffer) hb_set_unicode_props (hb_buffer_t *buffer)
{ {
unsigned int count = buffer->len; unsigned int count = buffer->len;
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
set_unicode_props (&buffer->info[i], buffer->unicode); _hb_glyph_info_set_unicode_props (&buffer->info[i], buffer->unicode);
} }
static void static void
@ -190,7 +184,7 @@ hb_form_clusters (hb_buffer_t *buffer)
{ {
unsigned int count = buffer->len; unsigned int count = buffer->len;
for (unsigned int i = 1; i < count; i++) for (unsigned int i = 1; i < count; i++)
if (FLAG (buffer->info[i].general_category()) & if (FLAG (_hb_glyph_info_get_general_category (&buffer->info[i])) &
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
@ -379,6 +373,23 @@ hb_position_complex_fallback_visual (hb_ot_shape_context_t *c)
hb_truetype_kern (c); hb_truetype_kern (c);
} }
static void
hb_hide_zerowidth (hb_ot_shape_context_t *c)
{
/* TODO Save the space character in the font? */
hb_codepoint_t space;
if (!hb_font_get_glyph (c->font, ' ', 0, &space))
return; /* No point! */
unsigned int count = c->buffer->len;
for (unsigned int i = 0; i < count; i++)
if (unlikely (_hb_glyph_info_is_zero_width (&c->buffer->info[i]))) {
c->buffer->info[i].codepoint = space;
c->buffer->pos[i].x_advance = 0;
c->buffer->pos[i].y_advance = 0;
}
}
/* Do it! */ /* Do it! */
@ -390,10 +401,10 @@ hb_ot_shape_execute_internal (hb_ot_shape_context_t *c)
/* Save the original direction, we use it later. */ /* Save the original direction, we use it later. */
c->target_direction = c->buffer->props.direction; c->target_direction = c->buffer->props.direction;
HB_BUFFER_ALLOCATE_VAR (c->buffer, general_category); HB_BUFFER_ALLOCATE_VAR (c->buffer, unicode_props0);
HB_BUFFER_ALLOCATE_VAR (c->buffer, combining_class); HB_BUFFER_ALLOCATE_VAR (c->buffer, unicode_props1);
hb_set_unicode_props (c->buffer); /* BUFFER: Set general_category and combining_class */ hb_set_unicode_props (c->buffer);
hb_form_clusters (c->buffer); hb_form_clusters (c->buffer);
@ -427,8 +438,10 @@ hb_ot_shape_execute_internal (hb_ot_shape_context_t *c)
hb_position_complex_fallback_visual (c); hb_position_complex_fallback_visual (c);
} }
HB_BUFFER_DEALLOCATE_VAR (c->buffer, combining_class); hb_hide_zerowidth (c);
HB_BUFFER_DEALLOCATE_VAR (c->buffer, general_category);
HB_BUFFER_DEALLOCATE_VAR (c->buffer, unicode_props1);
HB_BUFFER_DEALLOCATE_VAR (c->buffer, unicode_props0);
c->buffer->props.direction = c->target_direction; c->buffer->props.direction = c->target_direction;

View File

@ -114,5 +114,43 @@ _hb_unicode_is_variation_selector (hb_codepoint_t unicode)
(unicode >= 0xE0100 && unicode <= 0xE01EF)); /* VARIATION SELECTOR-17..256 */ (unicode >= 0xE0100 && unicode <= 0xE01EF)); /* VARIATION SELECTOR-17..256 */
} }
/* Zero-Width invisible characters:
*
* 00AD SOFT HYPHEN
* 034F COMBINING GRAPHEME JOINER
*
* 200B ZERO WIDTH SPACE
* 200C ZERO WIDTH NON-JOINER
* 200D ZERO WIDTH JOINER
* 200E LEFT-TO-RIGHT MARK
* 200F RIGHT-TO-LEFT MARK
*
* 2028 LINE SEPARATOR
*
* 202A LEFT-TO-RIGHT EMBEDDING
* 202B RIGHT-TO-LEFT EMBEDDING
* 202C POP DIRECTIONAL FORMATTING
* 202D LEFT-TO-RIGHT OVERRIDE
* 202E RIGHT-TO-LEFT OVERRIDE
*
* 2060 WORD JOINER
* 2061 FUNCTION APPLICATION
* 2062 INVISIBLE TIMES
* 2063 INVISIBLE SEPARATOR
*
* FEFF ZERO WIDTH NO-BREAK SPACE
*/
static inline hb_bool_t
_hb_unicode_is_zero_width (hb_codepoint_t ch)
{
return ((ch & ~0x007F) == 0x2000 && (
(ch >= 0x200B && ch <= 0x200F) ||
(ch >= 0x202A && ch <= 0x202E) ||
(ch >= 0x2060 && ch <= 0x2063) ||
(ch == 0x2028)
)) || unlikely (ch == 0x00AD
|| ch == 0x034F
|| ch == 0xFEFF);
}
#endif /* HB_UNICODE_PRIVATE_HH */ #endif /* HB_UNICODE_PRIVATE_HH */