harfbuzz/src/hb-ot-shape-complex-misc.cc

/*
 * Copyright © 2010  Google, Inc.
 *
 *  This is part of HarfBuzz, a text shaping library.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and its documentation for any purpose, provided that the
 * above copyright notice and the following two paragraphs appear in
 * all copies of this software.
 *
 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 *
 * Google Author(s): Behdad Esfahbod
 */

#include "hb-ot-shape-complex-private.hh"


/* TODO Add kana, and other small shapers here */

/* When adding trivial shapers, eg. kana, hangul, etc, we can either
 * add a full shaper enum value for them, or switch on the script in
 * the default complex shaper.  The former is faster, so I think that's
 * what we would do, and hence the default complex shaper shall remain
 * empty.
 */

void
_hb_ot_shape_complex_collect_features_default (hb_ot_map_builder_t *map, const hb_segment_properties_t  *props)
{
}

hb_ot_shape_normalization_mode_t
_hb_ot_shape_complex_normalization_preference_default (void)
{
  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;
}

void
_hb_ot_shape_complex_setup_masks_default (hb_ot_map_t *map, hb_buffer_t *buffer, hb_font_t *font)
{
}


/* Hangul shaper */

void
_hb_ot_shape_complex_collect_features_hangul (hb_ot_map_builder_t *map, const hb_segment_properties_t  *props)
{
}

hb_ot_shape_normalization_mode_t
_hb_ot_shape_complex_normalization_preference_hangul (void)
{
  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;
}

void
_hb_ot_shape_complex_setup_masks_hangul (hb_ot_map_t *map, hb_buffer_t *buffer, hb_font_t *font)
{
}


/* Thai / Lao shaper */

void
_hb_ot_shape_complex_collect_features_thai (hb_ot_map_builder_t *map, const hb_segment_properties_t  *props)
{
}

hb_ot_shape_normalization_mode_t
_hb_ot_shape_complex_normalization_preference_thai (void)
{
  return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;
}

void
_hb_ot_shape_complex_setup_masks_thai (hb_ot_map_t *map, hb_buffer_t *buffer, hb_font_t *font)
{
  /* The following is NOT specified in the MS OT Thai spec, however, it seems
   * to be what Uniscribe and other engines implement.  According to Eric Muller:
   *
   * When you have a sara am, decompose it in nikhahit + sara a, *and* mode the
   * nihka hit backwards over any *tone* mark (0E48-0E4B).
   *
   * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>
   *
   * This reordering is legit only when the nikhahit comes from a sara am, not
   * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably
   * not what a u↪ser wanted, but the rendering is nevertheless nikhahit above
   * chattawa.
   *
   * Same for Lao.
   */

  /*
   * Here are the characters of significance:
   *
   *			Thai	Lao
   * SARA AM:		U+0E33	U+0EB3
   * SARA AA:		U+0E32	U+0EB2
   * Nikhahit:		U+0E4D	U+0ECD
   *
   * Tone marks:
   * Thai:	<0E48..0E4B> CCC=107
   * Lao:	<0EC8..0ECB> CCC=122
   *
   * Note how the Lao versions are the same as Thai + 0x80.
   */

  /* We only get one script at a time, so a script-agnostic implementation
   * is adequate here. */
#define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33)
#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D)
#define SARA_AA_FROM_SARA_AM(x) ((x) - 1)
#define IS_TONE_MARK(x) (((x) & ~0x0083) == 0x0E48)

  buffer->clear_output ();
  unsigned int count = buffer->len;
  for (buffer->idx = 0; buffer->idx < count;)
  {
    if (likely (!IS_SARA_AM (buffer->info[buffer->idx].codepoint))) {
      buffer->next_glyph ();
      continue;
    }

    /* Is SARA AM. Decompose and reorder. */
    uint16_t decomposed[2] = {NIKHAHIT_FROM_SARA_AM (buffer->info[buffer->idx].codepoint),
			      SARA_AA_FROM_SARA_AM (buffer->info[buffer->idx].codepoint)};
    buffer->replace_glyphs (1, 2, decomposed);
    if (unlikely (buffer->in_error))
      return;

    /* Ok, let's see... */
    unsigned int end = buffer->out_len;
    unsigned int start = end - 2;
    while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))
      start--;

    /* Move Nikhahit (end-2) to the beginning */
    hb_glyph_info_t t = buffer->out_info[end - 2];
    memmove (buffer->out_info + start + 1,
	     buffer->out_info + start,
	     sizeof (buffer->out_info[0]) * (end - start - 2));
    buffer->out_info[start] = t;

    /* Make cluster */
    for (; start > 0 && buffer->out_info[start - 1].cluster == buffer->out_info[start].cluster; start--)
      ;
    for (; buffer->idx < count;)
      if (buffer->info[buffer->idx].cluster == buffer->out_info[buffer->out_len - 1].cluster)
        buffer->next_glyph ();
      else
        break;
    end = buffer->out_len;

    buffer->merge_out_clusters (start, end);
  }
  buffer->swap_buffers ();
}
Minor 2011-07-08 04:35:17 +02:00			`/*`
			`* Copyright © 2010 Google, Inc.`
			`*`
			`* This is part of HarfBuzz, a text shaping library.`
			`*`
			`* Permission is hereby granted, without written agreement and without`
			`* license or royalty fees, to use, copy, modify, and distribute this`
			`* software and its documentation for any purpose, provided that the`
			`* above copyright notice and the following two paragraphs appear in`
			`* all copies of this software.`
			`*`
			`* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR`
			`* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES`
			`* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN`
			`* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH`
			`* DAMAGE.`
			`*`
			`* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,`
			`* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND`
			`* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS`
			`* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO`
			`* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.`
			`*`
			`* Google Author(s): Behdad Esfahbod`
			`*/`

			`#include "hb-ot-shape-complex-private.hh"`


Add Thai shaper that does SARA AM decomposition / reordering That's not in the OpenType spec, but it's what MS and Adobe do. 2012-04-10 16:52:07 +02:00			`/* TODO Add kana, and other small shapers here */`
Minor 2011-07-08 04:35:17 +02:00
			`/* When adding trivial shapers, eg. kana, hangul, etc, we can either`
			`* add a full shaper enum value for them, or switch on the script in`
			`* the default complex shaper. The former is faster, so I think that's`
			`* what we would do, and hence the default complex shaper shall remain`
			`* empty.`
			`*/`

			`void`
			`_hb_ot_shape_complex_collect_features_default (hb_ot_map_builder_t map, const hb_segment_properties_t props)`
			`{`
			`}`

Add normalize mode In preparation for Hangul shaper. 2012-04-05 23:25:19 +02:00			`hb_ot_shape_normalization_mode_t`
			`_hb_ot_shape_complex_normalization_preference_default (void)`
Add prefer_decomposed() complex-shaper callback This allows the Indic shaper to request decomposed characters. This will handle split matra for free. Other shapers prefer precomposed characters. 2011-07-21 18:23:12 +02:00			`{`
Add normalize mode In preparation for Hangul shaper. 2012-04-05 23:25:19 +02:00			`return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS;`
Add prefer_decomposed() complex-shaper callback This allows the Indic shaper to request decomposed characters. This will handle split matra for free. Other shapers prefer precomposed characters. 2011-07-21 18:23:12 +02:00			`}`

Minor 2011-07-08 04:35:17 +02:00			`void`
In Arabic fallback shaping, check that the font has glyph for new char 2012-04-11 00:02:20 +02:00			`_hb_ot_shape_complex_setup_masks_default (hb_ot_map_t map, hb_buffer_t buffer, hb_font_t *font)`
Minor 2011-07-08 04:35:17 +02:00			`{`
			`}`


Add simple Hangul shaper that recomposes Jamo when feasible Previously, we were NOT actually recomposing Hangul Jamo. We do now. The two lines in: test/shaping/texts/in-tree/shaper-default/script-hangul/misc/misc.txt Now render the same with the UnDotum.ttf font. Previously the second linle was rendering boxes. We can also start applying OpenType Jamo features later. At this time, I have no idea how the 'ljmo', 'vjmo', 'tjmo' features are supposed to work. Maybe someone can explain them to me? 2012-04-07 21:06:55 +02:00
Add Thai shaper that does SARA AM decomposition / reordering That's not in the OpenType spec, but it's what MS and Adobe do. 2012-04-10 16:52:07 +02:00			`/* Hangul shaper */`

Add simple Hangul shaper that recomposes Jamo when feasible Previously, we were NOT actually recomposing Hangul Jamo. We do now. The two lines in: test/shaping/texts/in-tree/shaper-default/script-hangul/misc/misc.txt Now render the same with the UnDotum.ttf font. Previously the second linle was rendering boxes. We can also start applying OpenType Jamo features later. At this time, I have no idea how the 'ljmo', 'vjmo', 'tjmo' features are supposed to work. Maybe someone can explain them to me? 2012-04-07 21:06:55 +02:00			`void`
			`_hb_ot_shape_complex_collect_features_hangul (hb_ot_map_builder_t map, const hb_segment_properties_t props)`
			`{`
			`}`

			`hb_ot_shape_normalization_mode_t`
			`_hb_ot_shape_complex_normalization_preference_hangul (void)`
			`{`
			`return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;`
			`}`

			`void`
In Arabic fallback shaping, check that the font has glyph for new char 2012-04-11 00:02:20 +02:00			`_hb_ot_shape_complex_setup_masks_hangul (hb_ot_map_t map, hb_buffer_t buffer, hb_font_t *font)`
Add simple Hangul shaper that recomposes Jamo when feasible Previously, we were NOT actually recomposing Hangul Jamo. We do now. The two lines in: test/shaping/texts/in-tree/shaper-default/script-hangul/misc/misc.txt Now render the same with the UnDotum.ttf font. Previously the second linle was rendering boxes. We can also start applying OpenType Jamo features later. At this time, I have no idea how the 'ljmo', 'vjmo', 'tjmo' features are supposed to work. Maybe someone can explain them to me? 2012-04-07 21:06:55 +02:00			`{`
			`}`
Add Thai shaper that does SARA AM decomposition / reordering That's not in the OpenType spec, but it's what MS and Adobe do. 2012-04-10 16:52:07 +02:00


			`/* Thai / Lao shaper */`

			`void`
			`_hb_ot_shape_complex_collect_features_thai (hb_ot_map_builder_t map, const hb_segment_properties_t props)`
			`{`
			`}`

			`hb_ot_shape_normalization_mode_t`
			`_hb_ot_shape_complex_normalization_preference_thai (void)`
			`{`
			`return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL;`
			`}`

			`void`
In Arabic fallback shaping, check that the font has glyph for new char 2012-04-11 00:02:20 +02:00			`_hb_ot_shape_complex_setup_masks_thai (hb_ot_map_t map, hb_buffer_t buffer, hb_font_t *font)`
Add Thai shaper that does SARA AM decomposition / reordering That's not in the OpenType spec, but it's what MS and Adobe do. 2012-04-10 16:52:07 +02:00			`{`
			`/* The following is NOT specified in the MS OT Thai spec, however, it seems`
			`* to be what Uniscribe and other engines implement. According to Eric Muller:`
			`*`
			`* When you have a sara am, decompose it in nikhahit + sara a, and mode the`
			`* nihka hit backwards over any tone mark (0E48-0E4B).`
			`*`
			`* <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32>`
			`*`
			`* This reordering is legit only when the nikhahit comes from a sara am, not`
			`* when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably`
			`* not what a u↪ser wanted, but the rendering is nevertheless nikhahit above`
			`* chattawa.`
			`*`
			`* Same for Lao.`
			`*/`

			`/*`
			`* Here are the characters of significance:`
			`*`
			`* Thai Lao`
			`* SARA AM: U+0E33 U+0EB3`
			`* SARA AA: U+0E32 U+0EB2`
			`* Nikhahit: U+0E4D U+0ECD`
			`*`
			`* Tone marks:`
			`* Thai: <0E48..0E4B> CCC=107`
			`* Lao: <0EC8..0ECB> CCC=122`
			`*`
			`* Note how the Lao versions are the same as Thai + 0x80.`
			`*/`

			`/* We only get one script at a time, so a script-agnostic implementation`
			`* is adequate here. */`
			`#define IS_SARA_AM(x) (((x) & ~0x0080) == 0x0E33)`
			`#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0xE33 + 0xE4D)`
			`#define SARA_AA_FROM_SARA_AM(x) ((x) - 1)`
			`#define IS_TONE_MARK(x) (((x) & ~0x0083) == 0x0E48)`

			`buffer->clear_output ();`
			`unsigned int count = buffer->len;`
			`for (buffer->idx = 0; buffer->idx < count;)`
			`{`
Implement Arabic fallback shaping mandatory ligatures 2012-04-10 23:20:05 +02:00			`if (likely (!IS_SARA_AM (buffer->info[buffer->idx].codepoint))) {`
Add Thai shaper that does SARA AM decomposition / reordering That's not in the OpenType spec, but it's what MS and Adobe do. 2012-04-10 16:52:07 +02:00			`buffer->next_glyph ();`
			`continue;`
			`}`

			`/* Is SARA AM. Decompose and reorder. */`
			`uint16_t decomposed[2] = {NIKHAHIT_FROM_SARA_AM (buffer->info[buffer->idx].codepoint),`
			`SARA_AA_FROM_SARA_AM (buffer->info[buffer->idx].codepoint)};`
			`buffer->replace_glyphs (1, 2, decomposed);`
Implement Arabic fallback shaping mandatory ligatures 2012-04-10 23:20:05 +02:00			`if (unlikely (buffer->in_error))`
Add Thai shaper that does SARA AM decomposition / reordering That's not in the OpenType spec, but it's what MS and Adobe do. 2012-04-10 16:52:07 +02:00			`return;`

			`/* Ok, let's see... */`
			`unsigned int end = buffer->out_len;`
			`unsigned int start = end - 2;`
			`while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint))`
			`start--;`

			`/* Move Nikhahit (end-2) to the beginning */`
			`hb_glyph_info_t t = buffer->out_info[end - 2];`
			`memmove (buffer->out_info + start + 1,`
			`buffer->out_info + start,`
			`sizeof (buffer->out_info[0]) * (end - start - 2));`
			`buffer->out_info[start] = t;`

			`/* Make cluster */`
			`for (; start > 0 && buffer->out_info[start - 1].cluster == buffer->out_info[start].cluster; start--)`
			`;`
			`for (; buffer->idx < count;)`
			`if (buffer->info[buffer->idx].cluster == buffer->out_info[buffer->out_len - 1].cluster)`
			`buffer->next_glyph ();`
			`else`
			`break;`
			`end = buffer->out_len;`

			`buffer->merge_out_clusters (start, end);`
			`}`
			`buffer->swap_buffers ();`
			`}`