[Indic] Implement dotted-circle insertion for broken clusters
No panic, we reeally insert dotted circle when it's absolutely broken. Fixes most of the dotted-circle cases against Uniscribe. (for Devanagari fixes 80% of them, for Khmer 70%; the rest look like Uniscribe being really bogus...) I had to make a decision. Apparently Uniscribe adds one dotted circle to each broken character. I tried that, but that goes wrong easily with split matras. So I made it add only one dotted circle to an entire broken syllable tail. As in: "if there was a dotted circle here, this would have formed a correct cluster." That works better for split stuff, and I like it more.
This commit is contained in:
parent
327d14ef18
commit
b85800f9de
|
@ -152,6 +152,7 @@ struct hb_buffer_t {
|
||||||
HB_INTERNAL void replace_glyph (hb_codepoint_t glyph_index);
|
HB_INTERNAL void replace_glyph (hb_codepoint_t glyph_index);
|
||||||
/* Makes a copy of the glyph at idx to output and replace glyph_index */
|
/* Makes a copy of the glyph at idx to output and replace glyph_index */
|
||||||
HB_INTERNAL void output_glyph (hb_codepoint_t glyph_index);
|
HB_INTERNAL void output_glyph (hb_codepoint_t glyph_index);
|
||||||
|
HB_INTERNAL void output_info (hb_glyph_info_t &glyph_info);
|
||||||
/* Copies glyph at idx to output but doesn't advance idx */
|
/* Copies glyph at idx to output but doesn't advance idx */
|
||||||
HB_INTERNAL void copy_glyph (void);
|
HB_INTERNAL void copy_glyph (void);
|
||||||
/* Copies glyph at idx to output and advance idx.
|
/* Copies glyph at idx to output and advance idx.
|
||||||
|
|
|
@ -267,6 +267,16 @@ hb_buffer_t::output_glyph (hb_codepoint_t glyph_index)
|
||||||
out_len++;
|
out_len++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
hb_buffer_t::output_info (hb_glyph_info_t &glyph_info)
|
||||||
|
{
|
||||||
|
if (unlikely (!make_room_for (0, 1))) return;
|
||||||
|
|
||||||
|
out_info[out_len] = glyph_info;
|
||||||
|
|
||||||
|
out_len++;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
hb_buffer_t::copy_glyph (void)
|
hb_buffer_t::copy_glyph (void)
|
||||||
{
|
{
|
||||||
|
|
|
@ -75,12 +75,14 @@ halant_or_matra_group = (final_halant_group | matra_group{0,4});
|
||||||
consonant_syllable = Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail;
|
consonant_syllable = Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail;
|
||||||
vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail);
|
vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail);
|
||||||
standalone_cluster = reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail;
|
standalone_cluster = reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail;
|
||||||
|
broken_cluster = n? (halant_group.cn){0,4} halant_or_matra_group syllable_tail;
|
||||||
other = any;
|
other = any;
|
||||||
|
|
||||||
main := |*
|
main := |*
|
||||||
consonant_syllable => { found_syllable (consonant_syllable); };
|
consonant_syllable => { found_syllable (consonant_syllable); };
|
||||||
vowel_syllable => { found_syllable (vowel_syllable); };
|
vowel_syllable => { found_syllable (vowel_syllable); };
|
||||||
standalone_cluster => { found_syllable (standalone_cluster); };
|
standalone_cluster => { found_syllable (standalone_cluster); };
|
||||||
|
broken_cluster => { found_syllable (broken_cluster); *had_broken_cluster = true; };
|
||||||
other => { found_syllable (non_indic_cluster); };
|
other => { found_syllable (non_indic_cluster); };
|
||||||
*|;
|
*|;
|
||||||
|
|
||||||
|
@ -98,7 +100,7 @@ main := |*
|
||||||
} HB_STMT_END
|
} HB_STMT_END
|
||||||
|
|
||||||
static void
|
static void
|
||||||
find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer)
|
find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer, bool *had_broken_cluster)
|
||||||
{
|
{
|
||||||
unsigned int p, pe, eof, ts, te, act;
|
unsigned int p, pe, eof, ts, te, act;
|
||||||
int cs;
|
int cs;
|
||||||
|
|
|
@ -300,7 +300,7 @@ is_halant_or_coeng (const hb_glyph_info_t &info)
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
set_indic_properties (hb_glyph_info_t &info)
|
set_indic_properties (hb_glyph_info_t &info)
|
||||||
{
|
{
|
||||||
hb_codepoint_t u = info.codepoint;
|
hb_codepoint_t u = info.codepoint;
|
||||||
unsigned int type = get_indic_categories (u);
|
unsigned int type = get_indic_categories (u);
|
||||||
|
|
|
@ -769,6 +769,15 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
|
||||||
initial_reordering_consonant_syllable (plan, buffer, start, end);
|
initial_reordering_consonant_syllable (plan, buffer, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan,
|
||||||
|
hb_buffer_t *buffer,
|
||||||
|
unsigned int start, unsigned int end)
|
||||||
|
{
|
||||||
|
/* We already inserted dotted-circles, so just call the standalone_cluster. */
|
||||||
|
initial_reordering_standalone_cluster (plan, buffer, start, end);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
|
initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED,
|
||||||
hb_buffer_t *buffer HB_UNUSED,
|
hb_buffer_t *buffer HB_UNUSED,
|
||||||
|
@ -799,23 +808,63 @@ initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
|
||||||
case consonant_syllable: initial_reordering_consonant_syllable (plan, buffer, start, end); return;
|
case consonant_syllable: initial_reordering_consonant_syllable (plan, buffer, start, end); return;
|
||||||
case vowel_syllable: initial_reordering_vowel_syllable (plan, buffer, start, end); return;
|
case vowel_syllable: initial_reordering_vowel_syllable (plan, buffer, start, end); return;
|
||||||
case standalone_cluster: initial_reordering_standalone_cluster (plan, buffer, start, end); return;
|
case standalone_cluster: initial_reordering_standalone_cluster (plan, buffer, start, end); return;
|
||||||
case broken_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return;
|
case broken_cluster: initial_reordering_broken_cluster (plan, buffer, start, end); return;
|
||||||
case non_indic_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return;
|
case non_indic_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
insert_dotted_circles (const hb_ot_shape_plan_t *plan,
|
||||||
|
hb_font_t *font,
|
||||||
|
hb_buffer_t *buffer)
|
||||||
|
{
|
||||||
|
hb_codepoint_t dottedcircle_glyph;
|
||||||
|
if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph))
|
||||||
|
return;
|
||||||
|
|
||||||
|
hb_glyph_info_t dottedcircle;
|
||||||
|
dottedcircle.codepoint = 0x25CC;
|
||||||
|
set_indic_properties (dottedcircle);
|
||||||
|
dottedcircle.codepoint = dottedcircle_glyph;
|
||||||
|
|
||||||
|
buffer->clear_output ();
|
||||||
|
|
||||||
|
buffer->idx = 0;
|
||||||
|
unsigned int last_syllable = 0;
|
||||||
|
while (buffer->idx < buffer->len)
|
||||||
|
{
|
||||||
|
unsigned int syllable = buffer->cur().syllable();
|
||||||
|
syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F);
|
||||||
|
if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
|
||||||
|
{
|
||||||
|
hb_glyph_info_t info = dottedcircle;
|
||||||
|
info.cluster = buffer->cur().cluster;
|
||||||
|
info.mask = buffer->cur().mask;
|
||||||
|
info.syllable() = buffer->cur().syllable();
|
||||||
|
buffer->output_info (info);
|
||||||
|
last_syllable = syllable;
|
||||||
|
}
|
||||||
|
buffer->next_glyph ();
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer->swap_buffers ();
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
initial_reordering (const hb_ot_shape_plan_t *plan,
|
initial_reordering (const hb_ot_shape_plan_t *plan,
|
||||||
hb_font_t *font,
|
hb_font_t *font,
|
||||||
hb_buffer_t *buffer)
|
hb_buffer_t *buffer)
|
||||||
{
|
{
|
||||||
unsigned int count = buffer->len;
|
|
||||||
if (unlikely (!count)) return;
|
|
||||||
|
|
||||||
update_consonant_positions (plan, font, buffer);
|
update_consonant_positions (plan, font, buffer);
|
||||||
find_syllables (plan, buffer);
|
|
||||||
|
bool had_broken_clusters = false;
|
||||||
|
find_syllables (plan, buffer, &had_broken_clusters);
|
||||||
|
if (unlikely (had_broken_clusters))
|
||||||
|
insert_dotted_circles (plan, font, buffer);
|
||||||
|
|
||||||
hb_glyph_info_t *info = buffer->info;
|
hb_glyph_info_t *info = buffer->info;
|
||||||
|
unsigned int count = buffer->len;
|
||||||
|
if (unlikely (!count)) return;
|
||||||
unsigned int last = 0;
|
unsigned int last = 0;
|
||||||
unsigned int last_syllable = info[0].syllable();
|
unsigned int last_syllable = info[0].syllable();
|
||||||
for (unsigned int i = 1; i < count; i++)
|
for (unsigned int i = 1; i < count; i++)
|
||||||
|
@ -1170,6 +1219,12 @@ final_reordering (const hb_ot_shape_plan_t *plan,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static hb_ot_shape_normalization_mode_t
|
||||||
|
normalization_preference_indic (const hb_ot_shape_plan_t *plan)
|
||||||
|
{
|
||||||
|
return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
|
||||||
|
}
|
||||||
|
|
||||||
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
|
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
|
||||||
{
|
{
|
||||||
"indic",
|
"indic",
|
||||||
|
@ -1178,7 +1233,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
|
||||||
data_create_indic,
|
data_create_indic,
|
||||||
data_destroy_indic,
|
data_destroy_indic,
|
||||||
NULL, /* preprocess_text */
|
NULL, /* preprocess_text */
|
||||||
NULL, /* normalization_preference */
|
normalization_preference_indic,
|
||||||
setup_masks_indic,
|
setup_masks_indic,
|
||||||
false, /* zero_width_attached_marks */
|
false, /* zero_width_attached_marks */
|
||||||
};
|
};
|
||||||
|
|
|
@ -38,6 +38,7 @@
|
||||||
enum hb_ot_shape_normalization_mode_t {
|
enum hb_ot_shape_normalization_mode_t {
|
||||||
HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
|
HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED,
|
||||||
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */
|
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */
|
||||||
|
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* always fully decomposes and then recompose back */
|
||||||
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL, /* including base-to-base composition */
|
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL, /* including base-to-base composition */
|
||||||
|
|
||||||
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
|
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS
|
||||||
|
|
|
@ -414,10 +414,10 @@ decompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer, unsigned int
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
decompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool recompose, unsigned int end)
|
decompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool short_circuit, unsigned int end)
|
||||||
{
|
{
|
||||||
if (likely (buffer->idx + 1 == end))
|
if (likely (buffer->idx + 1 == end))
|
||||||
return decompose_current_character (font, buffer, recompose);
|
return decompose_current_character (font, buffer, short_circuit);
|
||||||
else
|
else
|
||||||
return decompose_multi_char_cluster (font, buffer, end);
|
return decompose_multi_char_cluster (font, buffer, end);
|
||||||
}
|
}
|
||||||
|
@ -437,7 +437,8 @@ void
|
||||||
_hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
|
_hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
|
||||||
hb_ot_shape_normalization_mode_t mode)
|
hb_ot_shape_normalization_mode_t mode)
|
||||||
{
|
{
|
||||||
bool recompose = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED;
|
bool short_circuit = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED &&
|
||||||
|
mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT;
|
||||||
bool can_use_recompose = false;
|
bool can_use_recompose = false;
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
|
|
||||||
|
@ -459,7 +460,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
|
||||||
if (buffer->cur().cluster != buffer->info[end].cluster)
|
if (buffer->cur().cluster != buffer->info[end].cluster)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
can_use_recompose = decompose_cluster (font, buffer, recompose, end) || can_use_recompose;
|
can_use_recompose = decompose_cluster (font, buffer, short_circuit, end) || can_use_recompose;
|
||||||
}
|
}
|
||||||
buffer->swap_buffers ();
|
buffer->swap_buffers ();
|
||||||
|
|
||||||
|
@ -495,7 +496,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (!recompose)
|
if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Third round, recompose */
|
/* Third round, recompose */
|
||||||
|
|
Loading…
Reference in New Issue