Implement SYRIAC ABBREVIATION MARK with 'stch' feature

The feature is enabled for any character in the Arabic shaper.
We should experiment with using it for Arabic subtending marks.
Though, that has a directionality problem as well, since those
are used with digits...

Fixes https://github.com/behdad/harfbuzz/issues/141
This commit is contained in:
Behdad Esfahbod 2015-11-05 17:29:03 -08:00
parent c743ec5886
commit 6e6f82b6f3
4 changed files with 244 additions and 6 deletions

View File

@ -28,9 +28,16 @@
#include "hb-ot-shape-private.hh"
#ifndef HB_DEBUG_ARABIC
#define HB_DEBUG_ARABIC (HB_DEBUG+0)
#endif
/* buffer var allocations */
#define arabic_shaping_action() complex_var_u8_0() /* arabic shaping action */
#define HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH HB_BUFFER_SCRATCH_FLAG_COMPLEX0
/*
* Joining types:
@ -84,7 +91,7 @@ static const hb_tag_t arabic_features[] =
/* Same order as the feature array */
enum {
enum arabic_action_t {
ISOL,
FINA,
FIN2,
@ -95,7 +102,11 @@ enum {
NONE,
ARABIC_NUM_FEATURES = NONE
ARABIC_NUM_FEATURES = NONE,
/* We abuse the same byte for other things... */
STCH_FIXED,
STCH_REPEATING,
};
static const struct arabic_state_table_entry {
@ -139,6 +150,11 @@ arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
hb_buffer_t *buffer);
static void
record_stch (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
hb_buffer_t *buffer);
static void
collect_features_arabic (hb_ot_shape_planner_t *plan)
{
@ -165,6 +181,9 @@ collect_features_arabic (hb_ot_shape_planner_t *plan)
map->add_gsub_pause (nuke_joiners);
map->add_global_bool_feature (HB_TAG('s','t','c','h'));
map->add_gsub_pause (record_stch);
map->add_global_bool_feature (HB_TAG('c','c','m','p'));
map->add_global_bool_feature (HB_TAG('l','o','c','l'));
@ -208,8 +227,10 @@ struct arabic_shape_plan_t
* mask_array[NONE] == 0. */
hb_mask_t mask_array[ARABIC_NUM_FEATURES + 1];
bool do_fallback;
arabic_fallback_plan_t *fallback_plan;
unsigned int do_fallback : 1;
unsigned int has_stch : 1;
};
void *
@ -220,6 +241,7 @@ data_create_arabic (const hb_ot_shape_plan_t *plan)
return NULL;
arabic_plan->do_fallback = plan->props.script == HB_SCRIPT_ARABIC;
arabic_plan->has_stch = !!plan->map.get_1_mask (HB_TAG ('s','t','c','h'));
for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) {
arabic_plan->mask_array[i] = plan->map.get_1_mask (arabic_features[i]);
arabic_plan->do_fallback = arabic_plan->do_fallback &&
@ -320,8 +342,6 @@ setup_masks_arabic_plan (const arabic_shape_plan_t *arabic_plan,
hb_glyph_info_t *info = buffer->info;
for (unsigned int i = 0; i < count; i++)
info[i].mask |= arabic_plan->mask_array[info[i].arabic_shaping_action()];
HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
}
static void
@ -371,6 +391,193 @@ retry:
arabic_fallback_plan_shape (fallback_plan, font, buffer);
}
/*
* Stretch feature: "stch".
* See example here:
* https://www.microsoft.com/typography/OpenTypeDev/syriac/intro.htm
* We implement this in a generic way, such that the Arabic subtending
* marks can use it as well.
*/
static void
record_stch (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
hb_buffer_t *buffer)
{
const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
if (!arabic_plan->has_stch)
return;
/* 'stch' feature was just applied. Look for anything that multiplied,
* and record it for stch treatment later. Note that rtlm, frac, etc
* are applied before stch, but we assume that they didn't result in
* anything multiplying into 5 pieces, so it's safe-ish... */
unsigned int count = buffer->len;
hb_glyph_info_t *info = buffer->info;
for (unsigned int i = 0; i < count; i++)
if (unlikely (_hb_glyph_info_multiplied (&info[i])))
{
unsigned int comp = _hb_glyph_info_get_lig_comp (&info[i]);
info[i].arabic_shaping_action() = comp % 2 ? STCH_REPEATING : STCH_FIXED;
buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH;
}
}
static void
apply_stch (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
if (likely (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH)))
return;
/* The Arabic shaper currently always processes in RTL mode, so we should
* stretch / position the stretched pieces to the left / preceding glyphs. */
/* We do a two pass implementation:
* First pass calculates the exact number of extra glyphs we need,
* We then enlarge buffer to have that much room,
* Second pass applies the stretch, copying things to the end of buffer.
*/
/* 30 = 2048 / 70.
* https://www.microsoft.com/typography/cursivescriptguidelines.mspx */
hb_position_t overlap = font->x_scale / 30;
DEBUG_MSG (ARABIC, NULL, "overlap for stretching is %d", overlap);
int sign = font->x_scale < 0 ? -1 : +1;
unsigned int extra_glyphs_needed = 0; // Set during MEASURE, used during CUT
for (enum step_t { MEASURE, CUT } step = MEASURE; step <= CUT; step = (step_t) (step + 1))
{
unsigned int count = buffer->len;
hb_glyph_info_t *info = buffer->info;
hb_glyph_position_t *pos = buffer->pos;
unsigned int new_len = count + extra_glyphs_needed; // write head during CUT
unsigned int j = new_len;
for (unsigned int i = count; i; i--)
{
if (!hb_in_range<unsigned> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
{
if (step == CUT)
{
--j;
info[j] = info[i - 1];
pos[j] = pos[i - 1];
}
continue;
}
/* Yay, justification! */
hb_position_t w_total = 0; // Total to be filled
hb_position_t w_fixed = 0; // Sum of fixed tiles
hb_position_t w_repeating = 0; // Sum of repeating tiles
int n_fixed = 0;
int n_repeating = 0;
unsigned int end = i;
while (i &&
hb_in_range<unsigned> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
{
i--;
hb_glyph_extents_t extents;
if (!font->get_glyph_extents (info[i].codepoint, &extents))
extents.width = 0;
extents.width -= overlap;
if (info[i].arabic_shaping_action() == STCH_FIXED)
{
w_fixed += extents.width;
n_fixed++;
}
else
{
w_repeating += extents.width;
n_repeating++;
}
}
unsigned int start = i;
unsigned int context = i;
while (context &&
!hb_in_range<unsigned> (info[context - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING) &&
(_hb_glyph_info_is_default_ignorable (&info[context - 1]) ||
HB_UNICODE_GENERAL_CATEGORY_IS_WORD (_hb_glyph_info_get_general_category (&info[context - 1]))))
{
context--;
w_total += pos[context].x_advance;
}
i++; // Don't touch i again.
DEBUG_MSG (ARABIC, NULL, "%s stretch at (%d,%d,%d)",
step == MEASURE ? "measuring" : "cutting", context, start, end);
DEBUG_MSG (ARABIC, NULL, "rest of word: count=%d width %d", start - context, w_total);
DEBUG_MSG (ARABIC, NULL, "fixed tiles: count=%d width=%d", n_fixed, w_fixed);
DEBUG_MSG (ARABIC, NULL, "repeating tiles: count=%d width=%d", n_repeating, w_repeating);
/* Number of additional times to repeat each repeating tile. */
int n_copies = 0;
hb_position_t w_remaining = w_total - w_fixed - overlap;
if (sign * w_remaining > sign * w_repeating && sign * w_repeating > 0)
n_copies = (sign * w_remaining + sign * w_repeating / 2) / (sign * w_repeating) - 1;
if (step == MEASURE)
{
extra_glyphs_needed += n_copies * n_repeating;
DEBUG_MSG (ARABIC, NULL, "will add extra %d copies of repeating tiles", n_copies);
}
else
{
hb_position_t x_offset = -overlap;
for (unsigned int k = end; k > start; k--)
{
hb_glyph_extents_t extents;
if (!font->get_glyph_extents (info[k - 1].codepoint, &extents))
extents.width = 0;
extents.width -= overlap;
unsigned int repeat = 1;
if (info[k - 1].arabic_shaping_action() == STCH_REPEATING)
repeat += n_copies;
DEBUG_MSG (ARABIC, NULL, "appending %d copies of glyph %d; j=%d",
repeat, info[k - 1].codepoint, j);
for (unsigned int n = 0; n < repeat; n++)
{
x_offset -= extents.width;
pos[k - 1].x_offset = x_offset;
/* Append copy. */
--j;
info[j] = info[k - 1];
pos[j] = pos[k - 1];
}
}
}
}
if (step == MEASURE)
{
if (unlikely (!buffer->ensure (count + extra_glyphs_needed)))
break;
}
else
{
assert (j == 0);
buffer->len = new_len;
}
}
}
static void
postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
apply_stch (plan, buffer, font);
HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
}
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
{
@ -380,7 +587,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
data_create_arabic,
data_destroy_arabic,
NULL, /* preprocess_text */
NULL, /* postprocess_glyphs */
postprocess_glyphs_arabic,
HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT,
NULL, /* decompose */
NULL, /* compose */

View File

@ -362,5 +362,24 @@ extern HB_INTERNAL const hb_unicode_funcs_t _hb_unicode_funcs_nil;
(FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
#define HB_UNICODE_GENERAL_CATEGORY_IS_WORD(gen_cat) \
(FLAG_SAFE (gen_cat) & \
(FLAG (HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) | \
FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL)))
#endif /* HB_UNICODE_PRIVATE_HH */

View File

@ -1 +1,2 @@
abbreviation-mark.txt
alaph.txt

View File

@ -0,0 +1,11 @@
ܐܒ
ܐ܏
ܐ܏ܒ
ܐ܏ܒܓ
ܐ܏ܒܓܕ
ܐ܏ܒܓܕܐ
ܐ܏ܒܓܕܐܐܐܐܐܐܐܐܐ
ܐ܏ܒܓܕܓܓܓܓܓܓ
ܐ܏ܒܓ