[myanmar] Remove duplication of categories in the Myanmar shaper

This commit is contained in:
Behdad Esfahbod 2022-06-05 01:52:31 -06:00
parent ce0528c0ff
commit 899ca24387
6 changed files with 181 additions and 223 deletions

View File

@ -145,7 +145,6 @@ HB_BASE_sources = \
hb-ot-shaper-indic.hh \
hb-ot-shaper-khmer.cc \
hb-ot-shaper-myanmar.cc \
hb-ot-shaper-myanmar.hh \
hb-ot-shaper-syllabic.cc \
hb-ot-shaper-syllabic.hh \
hb-ot-shaper-thai.cc \

View File

@ -31,6 +31,18 @@
#include "hb.hh"
#include "hb-ot-layout.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define myanmar_category() indic_category() /* myanmar_category_t */
#define myanmar_position() indic_position() /* myanmar_position_t */
using myanmar_category_t = unsigned;
using myanmar_position_t = indic_position_t;
#define M_Cat(Cat) myanmar_syllable_machine_ex_##Cat
enum myanmar_syllable_type_t {
myanmar_consonant_syllable,
myanmar_punctuation_cluster,
@ -39,7 +51,7 @@ enum myanmar_syllable_type_t {
};
#line 43 "hb-ot-shaper-myanmar-machine.hh"
#line 55 "hb-ot-shaper-myanmar-machine.hh"
#define myanmar_syllable_machine_ex_A 9u
#define myanmar_syllable_machine_ex_As 18u
#define myanmar_syllable_machine_ex_C 1u
@ -68,7 +80,7 @@ enum myanmar_syllable_type_t {
#define myanmar_syllable_machine_ex_ZWNJ 5u
#line 72 "hb-ot-shaper-myanmar-machine.hh"
#line 84 "hb-ot-shaper-myanmar-machine.hh"
static const unsigned char _myanmar_syllable_machine_trans_keys[] = {
1u, 32u, 3u, 32u, 5u, 29u, 5u, 8u, 5u, 29u, 3u, 25u, 5u, 25u, 5u, 25u,
3u, 32u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 32u, 1u, 15u, 3u, 32u, 3u, 32u,
@ -350,11 +362,11 @@ static const int myanmar_syllable_machine_error = -1;
static const int myanmar_syllable_machine_en_main = 0;
#line 44 "hb-ot-shaper-myanmar-machine.rl"
#line 56 "hb-ot-shaper-myanmar-machine.rl"
#line 102 "hb-ot-shaper-myanmar-machine.rl"
#line 114 "hb-ot-shaper-myanmar-machine.rl"
#define found_syllable(syllable_type) \
@ -373,7 +385,7 @@ find_syllables_myanmar (hb_buffer_t *buffer)
int cs;
hb_glyph_info_t *info = buffer->info;
#line 377 "hb-ot-shaper-myanmar-machine.hh"
#line 389 "hb-ot-shaper-myanmar-machine.hh"
{
cs = myanmar_syllable_machine_start;
ts = 0;
@ -381,7 +393,7 @@ find_syllables_myanmar (hb_buffer_t *buffer)
act = 0;
}
#line 122 "hb-ot-shaper-myanmar-machine.rl"
#line 134 "hb-ot-shaper-myanmar-machine.rl"
p = 0;
@ -389,7 +401,7 @@ find_syllables_myanmar (hb_buffer_t *buffer)
unsigned int syllable_serial = 1;
#line 393 "hb-ot-shaper-myanmar-machine.hh"
#line 405 "hb-ot-shaper-myanmar-machine.hh"
{
int _slen;
int _trans;
@ -403,7 +415,7 @@ _resume:
#line 1 "NONE"
{ts = p;}
break;
#line 407 "hb-ot-shaper-myanmar-machine.hh"
#line 419 "hb-ot-shaper-myanmar-machine.hh"
}
_keys = _myanmar_syllable_machine_trans_keys + (cs<<1);
@ -422,38 +434,38 @@ _eof_trans:
switch ( _myanmar_syllable_machine_trans_actions[_trans] ) {
case 6:
#line 94 "hb-ot-shaper-myanmar-machine.rl"
#line 106 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_consonant_syllable); }}
break;
case 4:
#line 95 "hb-ot-shaper-myanmar-machine.rl"
#line 107 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }}
break;
case 10:
#line 96 "hb-ot-shaper-myanmar-machine.rl"
#line 108 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_punctuation_cluster); }}
break;
case 8:
#line 97 "hb-ot-shaper-myanmar-machine.rl"
#line 109 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_broken_cluster); }}
break;
case 3:
#line 98 "hb-ot-shaper-myanmar-machine.rl"
#line 110 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }}
break;
case 5:
#line 94 "hb-ot-shaper-myanmar-machine.rl"
#line 106 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_consonant_syllable); }}
break;
case 7:
#line 97 "hb-ot-shaper-myanmar-machine.rl"
#line 109 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_broken_cluster); }}
break;
case 9:
#line 98 "hb-ot-shaper-myanmar-machine.rl"
#line 110 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_non_myanmar_cluster); }}
break;
#line 457 "hb-ot-shaper-myanmar-machine.hh"
#line 469 "hb-ot-shaper-myanmar-machine.hh"
}
_again:
@ -462,7 +474,7 @@ _again:
#line 1 "NONE"
{ts = 0;}
break;
#line 466 "hb-ot-shaper-myanmar-machine.hh"
#line 478 "hb-ot-shaper-myanmar-machine.hh"
}
if ( ++p != pe )
@ -478,7 +490,7 @@ _again:
}
#line 130 "hb-ot-shaper-myanmar-machine.rl"
#line 142 "hb-ot-shaper-myanmar-machine.rl"
}

View File

@ -29,6 +29,18 @@
#include "hb.hh"
#include "hb-ot-layout.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define myanmar_category() indic_category() /* myanmar_category_t */
#define myanmar_position() indic_position() /* myanmar_position_t */
using myanmar_category_t = unsigned;
using myanmar_position_t = indic_position_t;
#define M_Cat(Cat) myanmar_syllable_machine_ex_##Cat
enum myanmar_syllable_type_t {
myanmar_consonant_syllable,
myanmar_punctuation_cluster,
@ -46,30 +58,30 @@ enum myanmar_syllable_type_t {
%%{
export A = 9;
export As = 18;
export As = 18; # Asat
export C = 1;
export D = 10;
export D0 = 20;
export DB = 3;
export GB = 10;
export D = 10; # Digits except zero = GB
export D0 = 20; # Digit zero
export DB = 3; # Dot below = OT_N
export GB = 10; # = OT_PLACEHOLDER
export H = 4;
export IV = 2;
export MH = 21;
export ML = 32;
export MR = 22;
export MW = 23;
export MY = 24;
export PT = 25;
export V = 8;
export MH = 21; # Medial
export MR = 22; # Medial
export MW = 23; # Medial
export MY = 24; # Medial
export ML = 32; # Consonant medials
export PT = 25; # Pwo and other tones
export V = 8; # Visarga and Shan tones
export VAbv = 26;
export VBlw = 27;
export VPre = 28;
export VPst = 29;
export VS = 30;
export VS = 30; # Variation selectors
export ZWJ = 6;
export ZWNJ = 5;
export Ra = 15;
export P = 31;
export P = 31; # Punctuation
export CS = 19;
j = ZWJ|ZWNJ; # Joiners

View File

@ -28,8 +28,9 @@
#ifndef HB_NO_OT_SHAPE
#include "hb-ot-shaper-myanmar.hh"
#include "hb-ot-shaper-myanmar-machine.hh"
#include "hb-ot-shaper-indic.hh"
#include "hb-ot-layout.hh"
/*
@ -62,6 +63,118 @@ myanmar_other_features[] =
HB_TAG('p','s','t','s'),
};
static inline void
set_myanmar_properties (hb_glyph_info_t &info)
{
hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u);
unsigned int cat = type & 0xFFu;
myanmar_position_t pos = (myanmar_position_t) (type >> 8);
/* Myanmar
* https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze
*/
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu)))
cat = M_Cat(VS);
switch (u)
{
case 0x104Eu:
cat = M_Cat(C); /* The spec says C, IndicSyllableCategory doesn't have. */
break;
case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u:
case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u:
case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu:
case 0x25FEu:
cat = M_Cat(GB);
break;
case 0x1004u: case 0x101Bu: case 0x105Au:
cat = M_Cat(Ra);
break;
case 0x1032u: case 0x1036u:
cat = M_Cat(A);
break;
case 0x1039u:
cat = M_Cat(H);
break;
case 0x103Au:
cat = M_Cat(As);
break;
case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u:
case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u:
case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u:
case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u:
case 0x1097u: case 0x1098u: case 0x1099u:
cat = M_Cat(D);
break;
case 0x1040u:
cat = M_Cat(D); /* XXX The spec says D0, but Uniscribe doesn't seem to do. */
break;
case 0x103Eu:
cat = M_Cat(MH);
break;
case 0x1060u:
cat = M_Cat(ML);
break;
case 0x103Cu:
cat = M_Cat(MR);
break;
case 0x103Du: case 0x1082u:
cat = M_Cat(MW);
break;
case 0x103Bu: case 0x105Eu: case 0x105Fu:
cat = M_Cat(MY);
break;
case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au:
case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu:
cat = M_Cat(PT);
break;
case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u:
case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du:
case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu:
cat = M_Cat(V);
break;
case 0x104Au: case 0x104Bu:
cat = M_Cat(P);
break;
case 0xAA74u: case 0xAA75u: case 0xAA76u:
/* https://github.com/harfbuzz/harfbuzz/issues/218 */
cat = M_Cat(C);
break;
}
if (cat == OT_M)
{
switch ((int) pos)
{
case POS_PRE_C: cat = (myanmar_category_t) M_Cat(VPre);
pos = POS_PRE_M; break;
case POS_ABOVE_C: cat = (myanmar_category_t) M_Cat(VAbv); break;
case POS_BELOW_C: cat = (myanmar_category_t) M_Cat(VBlw); break;
case POS_POST_C: cat = (myanmar_category_t) M_Cat(VPst); break;
}
}
info.myanmar_category() = cat;
info.myanmar_position() = pos;
}
static void
setup_syllables_myanmar (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
@ -150,9 +263,9 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer,
{
unsigned int limit = start;
if (start + 3 <= end &&
info[start ].myanmar_category() == OT_Ra &&
info[start+1].myanmar_category() == OT_As &&
info[start+2].myanmar_category() == OT_H)
info[start ].myanmar_category() == M_Cat(Ra) &&
info[start+1].myanmar_category() == M_Cat(As) &&
info[start+2].myanmar_category() == M_Cat(H))
{
limit += 3;
base = start;
@ -189,7 +302,7 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer,
* Myanmar reordering! */
for (; i < end; i++)
{
if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */
if (info[i].myanmar_category() == M_Cat(MR)) /* Pre-base reordering */
{
info[i].myanmar_position() = POS_PRE_C;
continue;
@ -198,30 +311,30 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer,
{
continue;
}
if (info[i].myanmar_category() == OT_VS)
if (info[i].myanmar_category() == M_Cat(VS))
{
info[i].myanmar_position() = info[i - 1].myanmar_position();
continue;
}
if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw)
if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == M_Cat(VBlw))
{
pos = POS_BELOW_C;
info[i].myanmar_position() = pos;
continue;
}
if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A)
if (pos == POS_BELOW_C && info[i].myanmar_category() == M_Cat(A))
{
info[i].myanmar_position() = POS_BEFORE_SUB;
continue;
}
if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw)
if (pos == POS_BELOW_C && info[i].myanmar_category() == M_Cat(VBlw))
{
info[i].myanmar_position() = pos;
continue;
}
if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A)
if (pos == POS_BELOW_C && info[i].myanmar_category() != M_Cat(A))
{
pos = POS_AFTER_SUB;
info[i].myanmar_position() = pos;
@ -264,7 +377,7 @@ reorder_myanmar (const hb_ot_shape_plan_t *plan,
{
hb_syllabic_insert_dotted_circles (font, buffer,
myanmar_broken_cluster,
OT_GB);
M_Cat(GB));
foreach_syllable (buffer, start, end)
reorder_syllable_myanmar (plan, font->face, buffer, start, end);

View File

@ -1,177 +0,0 @@
/*
* Copyright © 2018 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPER_MYANMAR_HH
#define HB_OT_SHAPER_MYANMAR_HH
#include "hb.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define myanmar_category() indic_category() /* myanmar_category_t */
#define myanmar_position() indic_position() /* myanmar_position_t */
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum myanmar_category_t {
OT_As = 18, /* Asat */
OT_D0 = 20, /* Digit zero */
OT_DB = OT_N, /* Dot below */
OT_GB = OT_PLACEHOLDER,
OT_MH = 21, /* Various consonant medial types */
OT_MR = 22, /* Various consonant medial types */
OT_MW = 23, /* Various consonant medial types */
OT_MY = 24, /* Various consonant medial types */
OT_PT = 25, /* Pwo and other tones */
//OT_VAbv = 26,
//OT_VBlw = 27,
//OT_VPre = 28,
//OT_VPst = 29,
OT_VS = 30, /* Variation selectors */
OT_P = 31, /* Punctuation */
OT_D = OT_GB, /* Digits except zero */
OT_ML = 32, /* Various consonant medial types */
};
using myanmar_position_t = indic_position_t;
static inline void
set_myanmar_properties (hb_glyph_info_t &info)
{
hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u);
unsigned int cat = type & 0xFFu;
myanmar_position_t pos = (myanmar_position_t) (type >> 8);
/* Myanmar
* https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze
*/
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu)))
cat = OT_VS;
switch (u)
{
case 0x104Eu:
cat = OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */
break;
case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u:
case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u:
case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu:
case 0x25FEu:
cat = OT_GB;
break;
case 0x1004u: case 0x101Bu: case 0x105Au:
cat = OT_Ra;
break;
case 0x1032u: case 0x1036u:
cat = OT_A;
break;
case 0x1039u:
cat = OT_H;
break;
case 0x103Au:
cat = OT_As;
break;
case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u:
case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u:
case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u:
case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u:
case 0x1097u: case 0x1098u: case 0x1099u:
cat = OT_D;
break;
case 0x1040u:
cat = OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */
break;
case 0x103Eu:
cat = OT_MH;
break;
case 0x1060u:
cat = OT_ML;
break;
case 0x103Cu:
cat = OT_MR;
break;
case 0x103Du: case 0x1082u:
cat = OT_MW;
break;
case 0x103Bu: case 0x105Eu: case 0x105Fu:
cat = OT_MY;
break;
case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au:
case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu:
cat = OT_PT;
break;
case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u:
case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du:
case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu:
cat = OT_SM;
break;
case 0x104Au: case 0x104Bu:
cat = OT_P;
break;
case 0xAA74u: case 0xAA75u: case 0xAA76u:
/* https://github.com/harfbuzz/harfbuzz/issues/218 */
cat = OT_C;
break;
}
if (cat == OT_M)
{
switch ((int) pos)
{
case POS_PRE_C: cat = (myanmar_category_t) OT_VPre;
pos = POS_PRE_M; break;
case POS_ABOVE_C: cat = (myanmar_category_t) OT_VAbv; break;
case POS_BELOW_C: cat = (myanmar_category_t) OT_VBlw; break;
case POS_POST_C: cat = (myanmar_category_t) OT_VPst; break;
}
}
info.myanmar_category() = cat;
info.myanmar_position() = pos;
}
#endif /* HB_OT_SHAPER_MYANMAR_HH */

View File

@ -149,7 +149,6 @@ hb_base_sources = files(
'hb-ot-shaper-indic.hh',
'hb-ot-shaper-khmer.cc',
'hb-ot-shaper-myanmar.cc',
'hb-ot-shaper-myanmar.hh',
'hb-ot-shaper-syllabic.cc',
'hb-ot-shaper-syllabic.hh',
'hb-ot-shaper-thai.cc',