Replace zerowidth invisible chars with a zero-advance space glyph
Like Uniscribe does.
This commit is contained in:
parent
49e5da1591
commit
d1deaa2f5b
|
@ -25,6 +25,7 @@
|
|||
*/
|
||||
|
||||
#include "hb-ot-shape-complex-private.hh"
|
||||
#include "hb-ot-shape-private.hh"
|
||||
|
||||
|
||||
|
||||
|
@ -248,7 +249,7 @@ _hb_ot_shape_complex_setup_masks_arabic (hb_ot_map_t *map, hb_buffer_t *buffer,
|
|||
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
{
|
||||
unsigned int this_type = get_joining_type (buffer->info[i].codepoint, (hb_unicode_general_category_t) buffer->info[i].general_category());
|
||||
unsigned int this_type = get_joining_type (buffer->info[i].codepoint, _hb_glyph_info_get_general_category (&buffer->info[i]));
|
||||
|
||||
if (unlikely (this_type == JOINING_TYPE_T)) {
|
||||
buffer->info[i].arabic_shaping_action() = NONE;
|
||||
|
|
|
@ -432,24 +432,6 @@ found_non_indic (const hb_ot_map_t *map, hb_buffer_t *buffer, hb_mask_t *mask_ar
|
|||
|
||||
#include "hb-ot-shape-complex-indic-machine.hh"
|
||||
|
||||
static void
|
||||
remove_joiners (hb_buffer_t *buffer)
|
||||
{
|
||||
/* For now we remove joiners. However, Uniscbire seems to keep them
|
||||
* and output a zero-width space glyph for them. It is not clear to
|
||||
* me how that is supposed to interact with GSUB. */
|
||||
|
||||
buffer->clear_output ();
|
||||
unsigned int count = buffer->len;
|
||||
for (buffer->idx = 0; buffer->idx < count;)
|
||||
if (unlikely (is_joiner (buffer->info[buffer->idx])))
|
||||
buffer->skip_glyph ();
|
||||
else
|
||||
buffer->next_glyph ();
|
||||
|
||||
buffer->swap_buffers ();
|
||||
}
|
||||
|
||||
static void
|
||||
initial_reordering (const hb_ot_map_t *map,
|
||||
hb_face_t *face,
|
||||
|
@ -462,8 +444,6 @@ initial_reordering (const hb_ot_map_t *map,
|
|||
mask_array[i] = map->get_1_mask (indic_basic_features[i].tag);
|
||||
|
||||
find_syllables (map, buffer, mask_array);
|
||||
|
||||
remove_joiners (buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -35,8 +35,8 @@
|
|||
|
||||
|
||||
/* buffer var allocations, used during the entire shaping process */
|
||||
#define general_category() var1.u8[0] /* unicode general_category (hb_unicode_general_category_t) */
|
||||
#define combining_class() var1.u8[1] /* unicode combining_class (uint8_t) */
|
||||
#define unicode_props0() var1.u8[0]
|
||||
#define unicode_props1() var1.u8[1]
|
||||
|
||||
/* buffer var allocations, used by complex shapers */
|
||||
#define complex_var_persistent_u8_0() var2.u8[0]
|
||||
|
|
|
@ -68,19 +68,12 @@
|
|||
* matra for the Indic shaper.
|
||||
*/
|
||||
|
||||
static inline void
|
||||
set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
|
||||
{
|
||||
info->general_category() = hb_unicode_general_category (unicode, info->codepoint);
|
||||
info->combining_class() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
|
||||
}
|
||||
|
||||
static void
|
||||
output_glyph (hb_font_t *font, hb_buffer_t *buffer,
|
||||
hb_codepoint_t glyph)
|
||||
{
|
||||
buffer->output_glyph (glyph);
|
||||
set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode);
|
||||
_hb_glyph_info_set_unicode_props (&buffer->out_info[buffer->out_len - 1], buffer->unicode);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -163,8 +156,8 @@ decompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer,
|
|||
static int
|
||||
compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb)
|
||||
{
|
||||
unsigned int a = pa->combining_class();
|
||||
unsigned int b = pb->combining_class();
|
||||
unsigned int a = _hb_glyph_info_get_modified_combining_class (pa);
|
||||
unsigned int b = _hb_glyph_info_get_modified_combining_class (pb);
|
||||
|
||||
return a < b ? -1 : a == b ? 0 : +1;
|
||||
}
|
||||
|
@ -214,12 +207,12 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
|
|||
count = buffer->len;
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
{
|
||||
if (buffer->info[i].combining_class() == 0)
|
||||
if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0)
|
||||
continue;
|
||||
|
||||
unsigned int end;
|
||||
for (end = i + 1; end < count; end++)
|
||||
if (buffer->info[end].combining_class() == 0)
|
||||
if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
|
||||
break;
|
||||
|
||||
/* We are going to do a bubble-sort. Only do this if the
|
||||
|
@ -254,11 +247,11 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
|
|||
if (/* If mode is NOT COMPOSED_FULL (ie. it's COMPOSED_DIACRITICS), we don't try to
|
||||
* compose a CCC=0 character with it's preceding starter. */
|
||||
(mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL ||
|
||||
buffer->info[buffer->idx].combining_class() != 0) &&
|
||||
_hb_glyph_info_get_modified_combining_class (&buffer->info[buffer->idx]) != 0) &&
|
||||
/* If there's anything between the starter and this char, they should have CCC
|
||||
* smaller than this character's. */
|
||||
(starter == buffer->out_len - 1 ||
|
||||
buffer->out_info[buffer->out_len - 1].combining_class() < buffer->info[buffer->idx].combining_class()) &&
|
||||
_hb_glyph_info_get_modified_combining_class (&buffer->out_info[buffer->out_len - 1]) < _hb_glyph_info_get_modified_combining_class (&buffer->info[buffer->idx])) &&
|
||||
/* And compose. */
|
||||
hb_unicode_compose (buffer->unicode,
|
||||
buffer->out_info[starter].codepoint,
|
||||
|
@ -270,7 +263,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
|
|||
/* Composes. Modify starter and carry on. */
|
||||
buffer->out_info[starter].codepoint = composed;
|
||||
/* XXX update cluster */
|
||||
set_unicode_props (&buffer->out_info[starter], buffer->unicode);
|
||||
_hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer->unicode);
|
||||
|
||||
buffer->skip_glyph ();
|
||||
continue;
|
||||
|
@ -279,7 +272,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
|
|||
/* Blocked, or doesn't compose. */
|
||||
buffer->next_glyph ();
|
||||
|
||||
if (buffer->out_info[buffer->out_len - 1].combining_class() == 0)
|
||||
if (_hb_glyph_info_get_modified_combining_class (&buffer->out_info[buffer->out_len - 1]) == 0)
|
||||
starter = buffer->out_len - 1;
|
||||
}
|
||||
buffer->swap_buffers ();
|
||||
|
|
|
@ -53,4 +53,31 @@ _hb_ot_shape (hb_font_t *font,
|
|||
const hb_feature_t *features,
|
||||
unsigned int num_features);
|
||||
|
||||
|
||||
inline void
|
||||
_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
|
||||
{
|
||||
info->unicode_props0() = ((unsigned int) hb_unicode_general_category (unicode, info->codepoint)) |
|
||||
(_hb_unicode_is_zero_width (info->codepoint) ? 0x80 : 0);
|
||||
info->unicode_props1() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
|
||||
}
|
||||
|
||||
inline hb_unicode_general_category_t
|
||||
_hb_glyph_info_get_general_category (const hb_glyph_info_t *info)
|
||||
{
|
||||
return (hb_unicode_general_category_t) (info->unicode_props0() & 0x7F);
|
||||
}
|
||||
|
||||
inline unsigned int
|
||||
_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info)
|
||||
{
|
||||
return info->unicode_props1();
|
||||
}
|
||||
|
||||
inline hb_bool_t
|
||||
_hb_glyph_info_is_zero_width (const hb_glyph_info_t *info)
|
||||
{
|
||||
return !!(info->unicode_props0() & 0x80);
|
||||
}
|
||||
|
||||
#endif /* HB_OT_SHAPE_PRIVATE_HH */
|
||||
|
|
|
@ -43,6 +43,7 @@ hb_tag_t common_features[] = {
|
|||
HB_TAG('r','l','i','g'),
|
||||
};
|
||||
|
||||
|
||||
hb_tag_t horizontal_features[] = {
|
||||
HB_TAG('c','a','l','t'),
|
||||
HB_TAG('c','l','i','g'),
|
||||
|
@ -170,19 +171,12 @@ hb_ot_shape_setup_masks (hb_ot_shape_context_t *c)
|
|||
|
||||
/* Prepare */
|
||||
|
||||
static inline void
|
||||
set_unicode_props (hb_glyph_info_t *info, hb_unicode_funcs_t *unicode)
|
||||
{
|
||||
info->general_category() = hb_unicode_general_category (unicode, info->codepoint);
|
||||
info->combining_class() = _hb_unicode_modified_combining_class (unicode, info->codepoint);
|
||||
}
|
||||
|
||||
static void
|
||||
hb_set_unicode_props (hb_buffer_t *buffer)
|
||||
{
|
||||
unsigned int count = buffer->len;
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
set_unicode_props (&buffer->info[i], buffer->unicode);
|
||||
_hb_glyph_info_set_unicode_props (&buffer->info[i], buffer->unicode);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -190,7 +184,7 @@ hb_form_clusters (hb_buffer_t *buffer)
|
|||
{
|
||||
unsigned int count = buffer->len;
|
||||
for (unsigned int i = 1; i < count; i++)
|
||||
if (FLAG (buffer->info[i].general_category()) &
|
||||
if (FLAG (_hb_glyph_info_get_general_category (&buffer->info[i])) &
|
||||
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) |
|
||||
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) |
|
||||
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
|
||||
|
@ -379,6 +373,23 @@ hb_position_complex_fallback_visual (hb_ot_shape_context_t *c)
|
|||
hb_truetype_kern (c);
|
||||
}
|
||||
|
||||
static void
|
||||
hb_hide_zerowidth (hb_ot_shape_context_t *c)
|
||||
{
|
||||
/* TODO Save the space character in the font? */
|
||||
hb_codepoint_t space;
|
||||
if (!hb_font_get_glyph (c->font, ' ', 0, &space))
|
||||
return; /* No point! */
|
||||
|
||||
unsigned int count = c->buffer->len;
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
if (unlikely (_hb_glyph_info_is_zero_width (&c->buffer->info[i]))) {
|
||||
c->buffer->info[i].codepoint = space;
|
||||
c->buffer->pos[i].x_advance = 0;
|
||||
c->buffer->pos[i].y_advance = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Do it! */
|
||||
|
||||
|
@ -390,10 +401,10 @@ hb_ot_shape_execute_internal (hb_ot_shape_context_t *c)
|
|||
/* Save the original direction, we use it later. */
|
||||
c->target_direction = c->buffer->props.direction;
|
||||
|
||||
HB_BUFFER_ALLOCATE_VAR (c->buffer, general_category);
|
||||
HB_BUFFER_ALLOCATE_VAR (c->buffer, combining_class);
|
||||
HB_BUFFER_ALLOCATE_VAR (c->buffer, unicode_props0);
|
||||
HB_BUFFER_ALLOCATE_VAR (c->buffer, unicode_props1);
|
||||
|
||||
hb_set_unicode_props (c->buffer); /* BUFFER: Set general_category and combining_class */
|
||||
hb_set_unicode_props (c->buffer);
|
||||
|
||||
hb_form_clusters (c->buffer);
|
||||
|
||||
|
@ -427,8 +438,10 @@ hb_ot_shape_execute_internal (hb_ot_shape_context_t *c)
|
|||
hb_position_complex_fallback_visual (c);
|
||||
}
|
||||
|
||||
HB_BUFFER_DEALLOCATE_VAR (c->buffer, combining_class);
|
||||
HB_BUFFER_DEALLOCATE_VAR (c->buffer, general_category);
|
||||
hb_hide_zerowidth (c);
|
||||
|
||||
HB_BUFFER_DEALLOCATE_VAR (c->buffer, unicode_props1);
|
||||
HB_BUFFER_DEALLOCATE_VAR (c->buffer, unicode_props0);
|
||||
|
||||
c->buffer->props.direction = c->target_direction;
|
||||
|
||||
|
|
|
@ -114,5 +114,43 @@ _hb_unicode_is_variation_selector (hb_codepoint_t unicode)
|
|||
(unicode >= 0xE0100 && unicode <= 0xE01EF)); /* VARIATION SELECTOR-17..256 */
|
||||
}
|
||||
|
||||
/* Zero-Width invisible characters:
|
||||
*
|
||||
* 00AD SOFT HYPHEN
|
||||
* 034F COMBINING GRAPHEME JOINER
|
||||
*
|
||||
* 200B ZERO WIDTH SPACE
|
||||
* 200C ZERO WIDTH NON-JOINER
|
||||
* 200D ZERO WIDTH JOINER
|
||||
* 200E LEFT-TO-RIGHT MARK
|
||||
* 200F RIGHT-TO-LEFT MARK
|
||||
*
|
||||
* 2028 LINE SEPARATOR
|
||||
*
|
||||
* 202A LEFT-TO-RIGHT EMBEDDING
|
||||
* 202B RIGHT-TO-LEFT EMBEDDING
|
||||
* 202C POP DIRECTIONAL FORMATTING
|
||||
* 202D LEFT-TO-RIGHT OVERRIDE
|
||||
* 202E RIGHT-TO-LEFT OVERRIDE
|
||||
*
|
||||
* 2060 WORD JOINER
|
||||
* 2061 FUNCTION APPLICATION
|
||||
* 2062 INVISIBLE TIMES
|
||||
* 2063 INVISIBLE SEPARATOR
|
||||
*
|
||||
* FEFF ZERO WIDTH NO-BREAK SPACE
|
||||
*/
|
||||
static inline hb_bool_t
|
||||
_hb_unicode_is_zero_width (hb_codepoint_t ch)
|
||||
{
|
||||
return ((ch & ~0x007F) == 0x2000 && (
|
||||
(ch >= 0x200B && ch <= 0x200F) ||
|
||||
(ch >= 0x202A && ch <= 0x202E) ||
|
||||
(ch >= 0x2060 && ch <= 0x2063) ||
|
||||
(ch == 0x2028)
|
||||
)) || unlikely (ch == 0x00AD
|
||||
|| ch == 0x034F
|
||||
|| ch == 0xFEFF);
|
||||
}
|
||||
|
||||
#endif /* HB_UNICODE_PRIVATE_HH */
|
||||
|
|
Loading…
Reference in New Issue