[arabic] Implement Unicode Arabic Mark Ordering Algorithm UTR#53

Fixes https://github.com/behdad/harfbuzz/issues/509
This commit is contained in:
Behdad Esfahbod 2017-10-04 14:47:10 +02:00
parent b6fe0ab636
commit ab8d70ec70
16 changed files with 106 additions and 3 deletions

View File

@ -613,6 +613,80 @@ postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan,
HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action); HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action);
} }
/* http://www.unicode.org/reports/tr53/tr53-1.pdf */
static hb_codepoint_t
modifier_combining_marks[] =
{
0x0654u, /* ARABIC HAMZA ABOVE */
0x0655u, /* ARABIC HAMZA BELOW */
0x0658u, /* ARABIC MARK NOON GHUNNA */
0x06DCu, /* ARABIC SMALL HIGH SEEN */
0x06E3u, /* ARABIC SMALL LOW SEEN */
0x06E7u, /* ARABIC SMALL HIGH YEH */
0x06E8u, /* ARABIC SMALL HIGH NOON */
0x08F3u, /* ARABIC SMALL HIGH WAW */
};
static inline bool
info_is_mcm (const hb_glyph_info_t &info)
{
hb_codepoint_t u = info.codepoint;
for (unsigned int i = 0; i < ARRAY_LENGTH (modifier_combining_marks); i++)
if (u == modifier_combining_marks[i])
return true;
return false;
}
static void
reorder_marks_arabic (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
unsigned int start,
unsigned int end)
{
hb_glyph_info_t *info = buffer->info;
unsigned int i = start;
for (unsigned int cc = 220; cc <= 230; cc += 10)
{
DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d\n", cc, i);
while (i < end && info_cc(info[i]) < cc)
i++;
DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d\n", cc, i);
if (i == end)
break;
if (info_cc(info[i]) > cc)
continue;
/* Technically we should also check "info_cc(info[j]) == cc"
* in the following loop. But not doing it is safe; we might
* end up moving all the 220 MCMs and 230 MCMs together in one
* move and be done. */
unsigned int j = i;
while (j < end && info_is_mcm (info[j]))
j++;
DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d\n", cc, i, j);
if (i == j)
continue;
/* Shift it! */
DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d\n", cc, i, j);
hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS];
assert (j - i <= ARRAY_LENGTH (temp));
buffer->merge_out_clusters (start, j);
memmove (temp, &info[i], (j - i) * sizeof (hb_glyph_info_t));
memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t));
memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t));
start += j - i;
i = j;
}
}
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic = const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
{ {
"arabic", "arabic",
@ -627,6 +701,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic =
NULL, /* compose */ NULL, /* compose */
setup_masks_arabic, setup_masks_arabic,
NULL, /* disable_otl */ NULL, /* disable_otl */
reorder_marks_arabic,
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */ true, /* fallback_position */
}; };

View File

@ -41,6 +41,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default =
NULL, /* compose */ NULL, /* compose */
NULL, /* setup_masks */ NULL, /* setup_masks */
NULL, /* disable_otl */ NULL, /* disable_otl */
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */ true, /* fallback_position */
}; };

View File

@ -426,6 +426,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
NULL, /* compose */ NULL, /* compose */
setup_masks_hangul, setup_masks_hangul,
NULL, /* disable_otl */ NULL, /* disable_otl */
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
false, /* fallback_position */ false, /* fallback_position */
}; };

View File

@ -181,6 +181,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hebrew =
compose_hebrew, compose_hebrew,
NULL, /* setup_masks */ NULL, /* setup_masks */
disable_otl_hebrew, disable_otl_hebrew,
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */ true, /* fallback_position */
}; };

View File

@ -1840,6 +1840,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
compose_indic, compose_indic,
setup_masks_indic, setup_masks_indic,
NULL, /* disable_otl */ NULL, /* disable_otl */
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
false, /* fallback_position */ false, /* fallback_position */
}; };

View File

@ -524,6 +524,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_old =
NULL, /* compose */ NULL, /* compose */
NULL, /* setup_masks */ NULL, /* setup_masks */
NULL, /* disable_otl */ NULL, /* disable_otl */
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */ true, /* fallback_position */
}; };
@ -542,6 +543,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar =
NULL, /* compose */ NULL, /* compose */
setup_masks_myanmar, setup_masks_myanmar,
NULL, /* disable_otl */ NULL, /* disable_otl */
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
false, /* fallback_position */ false, /* fallback_position */
}; };

View File

@ -39,6 +39,8 @@
#define complex_var_u8_1() var2.u8[3] #define complex_var_u8_1() var2.u8[3]
#define HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS 32
enum hb_ot_shape_zero_width_marks_type_t { enum hb_ot_shape_zero_width_marks_type_t {
HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
@ -154,6 +156,16 @@ struct hb_ot_complex_shaper_t
*/ */
bool (*disable_otl) (const hb_ot_shape_plan_t *plan); bool (*disable_otl) (const hb_ot_shape_plan_t *plan);
/* reorder_marks()
* Called during shape().
* Shapers can use to modify ordering of combining marks.
* May be NULL.
*/
void (*reorder_marks) (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
unsigned int start,
unsigned int end);
hb_ot_shape_zero_width_marks_type_t zero_width_marks; hb_ot_shape_zero_width_marks_type_t zero_width_marks;
bool fallback_position; bool fallback_position;

View File

@ -378,6 +378,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai =
NULL, /* compose */ NULL, /* compose */
NULL, /* setup_masks */ NULL, /* setup_masks */
NULL, /* disable_otl */ NULL, /* disable_otl */
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
false,/* fallback_position */ false,/* fallback_position */
}; };

View File

@ -58,6 +58,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_tibetan =
NULL, /* compose */ NULL, /* compose */
NULL, /* setup_masks */ NULL, /* setup_masks */
NULL, /* disable_otl */ NULL, /* disable_otl */
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE,
true, /* fallback_position */ true, /* fallback_position */
}; };

View File

@ -607,6 +607,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
compose_use, compose_use,
setup_masks_use, setup_masks_use,
NULL, /* disable_otl */ NULL, /* disable_otl */
NULL, /* reorder_marks */
HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY,
false, /* fallback_position */ false, /* fallback_position */
}; };

View File

@ -345,14 +345,18 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan,
if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0) if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0)
break; break;
/* We are going to do a O(n^2). Only do this if the sequence is short. */ /* We are going to do a O(n^2). Only do this if the sequence is short,
if (end - i > 10) { * but not too short ;). */
if (end - i < 2 || end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) {
i = end; i = end;
continue; continue;
} }
buffer->sort (i, end, compare_combining_class); buffer->sort (i, end, compare_combining_class);
if (plan->shaper->reorder_marks)
plan->shaper->reorder_marks (plan, buffer, i, end);
i = end; i = end;
} }

View File

@ -44,6 +44,7 @@ TESTS = \
tests/arabic-fallback-shaping.tests \ tests/arabic-fallback-shaping.tests \
tests/arabic-feature-order.tests \ tests/arabic-feature-order.tests \
tests/arabic-like-joining.tests \ tests/arabic-like-joining.tests \
tests/arabic-mark-order.tests \
tests/automatic-fractions.tests \ tests/automatic-fractions.tests \
tests/cluster.tests \ tests/cluster.tests \
tests/color-fonts.tests \ tests/color-fonts.tests \

View File

@ -43,7 +43,7 @@ if test $? != 0; then
fi fi
cp "$fontfile" "$dir/font.ttf" cp "$fontfile" "$dir/font.ttf"
pyftsubset \ fonttools subset \
--glyph-names \ --glyph-names \
--no-hinting \ --no-hinting \
--layout-features='*' \ --layout-features='*' \

View File

@ -0,0 +1,2 @@
fonts/sha1sum/94a5d6fb15a27521fba9ea4aee9cb39b2d03322a.ttf::U+064A,U+064E,U+0670,U+0653,U+0640,U+0654,U+064E,U+0627:[afii57415.zz04=7+481|afii57454=4@25,975+0|uni0654=4@-50,50+0|afii57440=4+650|uni0670_uni0653=0@75,400+0|afii57454=0@750,1125+0|afii57450.calt=0+1331]
fonts/sha1sum/24b8d24d00ae86f49791b746da4c9d3f717a51a8.ttf::U+0628,U+0618,U+0619,U+064E,U+064F,U+0654,U+0658,U+0653,U+0654,U+0651,U+0656,U+0651,U+065C,U+0655,U+0650:[uni0653.small=0@266,2508+0|uni0654=0@308,2151+0|uni0655=0@518,-1544+0|uni065C=0@501,-1453+0|uni0656=0@573,-659+0|uni0650=0@500,133+0|uni0619=0@300,1807+0|uni0618=0@357,1674+0|uni0651064E=0@387,1178+0|uni0651=0@402,764+0|uni0658=0@424,404+0|uni0654064F=0@540,-435+0|uni0628=0+1352]