From 899ca24387d84ebeff8ad6c9adbd72cd758b3aea Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 5 Jun 2022 01:52:31 -0600 Subject: [PATCH] [myanmar] Remove duplication of categories in the Myanmar shaper --- src/Makefile.sources | 1 - src/hb-ot-shaper-myanmar-machine.hh | 50 +++++--- src/hb-ot-shaper-myanmar-machine.rl | 40 ++++--- src/hb-ot-shaper-myanmar.cc | 135 +++++++++++++++++++-- src/hb-ot-shaper-myanmar.hh | 177 ---------------------------- src/meson.build | 1 - 6 files changed, 181 insertions(+), 223 deletions(-) delete mode 100644 src/hb-ot-shaper-myanmar.hh diff --git a/src/Makefile.sources b/src/Makefile.sources index 69645c0c8..5ec46bac1 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -145,7 +145,6 @@ HB_BASE_sources = \ hb-ot-shaper-indic.hh \ hb-ot-shaper-khmer.cc \ hb-ot-shaper-myanmar.cc \ - hb-ot-shaper-myanmar.hh \ hb-ot-shaper-syllabic.cc \ hb-ot-shaper-syllabic.hh \ hb-ot-shaper-thai.cc \ diff --git a/src/hb-ot-shaper-myanmar-machine.hh b/src/hb-ot-shaper-myanmar-machine.hh index cbb9235b9..29b81c1a8 100644 --- a/src/hb-ot-shaper-myanmar-machine.hh +++ b/src/hb-ot-shaper-myanmar-machine.hh @@ -31,6 +31,18 @@ #include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-shaper-indic.hh" + +/* buffer var allocations */ +#define myanmar_category() indic_category() /* myanmar_category_t */ +#define myanmar_position() indic_position() /* myanmar_position_t */ + +using myanmar_category_t = unsigned; +using myanmar_position_t = indic_position_t; + +#define M_Cat(Cat) myanmar_syllable_machine_ex_##Cat + enum myanmar_syllable_type_t { myanmar_consonant_syllable, myanmar_punctuation_cluster, @@ -39,7 +51,7 @@ enum myanmar_syllable_type_t { }; -#line 43 "hb-ot-shaper-myanmar-machine.hh" +#line 55 "hb-ot-shaper-myanmar-machine.hh" #define myanmar_syllable_machine_ex_A 9u #define myanmar_syllable_machine_ex_As 18u #define myanmar_syllable_machine_ex_C 1u @@ -68,7 +80,7 @@ enum myanmar_syllable_type_t { #define myanmar_syllable_machine_ex_ZWNJ 5u -#line 72 "hb-ot-shaper-myanmar-machine.hh" +#line 84 "hb-ot-shaper-myanmar-machine.hh" static const unsigned char _myanmar_syllable_machine_trans_keys[] = { 1u, 32u, 3u, 32u, 5u, 29u, 5u, 8u, 5u, 29u, 3u, 25u, 5u, 25u, 5u, 25u, 3u, 32u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 32u, 1u, 15u, 3u, 32u, 3u, 32u, @@ -350,11 +362,11 @@ static const int myanmar_syllable_machine_error = -1; static const int myanmar_syllable_machine_en_main = 0; -#line 44 "hb-ot-shaper-myanmar-machine.rl" +#line 56 "hb-ot-shaper-myanmar-machine.rl" -#line 102 "hb-ot-shaper-myanmar-machine.rl" +#line 114 "hb-ot-shaper-myanmar-machine.rl" #define found_syllable(syllable_type) \ @@ -373,7 +385,7 @@ find_syllables_myanmar (hb_buffer_t *buffer) int cs; hb_glyph_info_t *info = buffer->info; -#line 377 "hb-ot-shaper-myanmar-machine.hh" +#line 389 "hb-ot-shaper-myanmar-machine.hh" { cs = myanmar_syllable_machine_start; ts = 0; @@ -381,7 +393,7 @@ find_syllables_myanmar (hb_buffer_t *buffer) act = 0; } -#line 122 "hb-ot-shaper-myanmar-machine.rl" +#line 134 "hb-ot-shaper-myanmar-machine.rl" p = 0; @@ -389,7 +401,7 @@ find_syllables_myanmar (hb_buffer_t *buffer) unsigned int syllable_serial = 1; -#line 393 "hb-ot-shaper-myanmar-machine.hh" +#line 405 "hb-ot-shaper-myanmar-machine.hh" { int _slen; int _trans; @@ -403,7 +415,7 @@ _resume: #line 1 "NONE" {ts = p;} break; -#line 407 "hb-ot-shaper-myanmar-machine.hh" +#line 419 "hb-ot-shaper-myanmar-machine.hh" } _keys = _myanmar_syllable_machine_trans_keys + (cs<<1); @@ -422,38 +434,38 @@ _eof_trans: switch ( _myanmar_syllable_machine_trans_actions[_trans] ) { case 6: -#line 94 "hb-ot-shaper-myanmar-machine.rl" +#line 106 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_consonant_syllable); }} break; case 4: -#line 95 "hb-ot-shaper-myanmar-machine.rl" +#line 107 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }} break; case 10: -#line 96 "hb-ot-shaper-myanmar-machine.rl" +#line 108 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_punctuation_cluster); }} break; case 8: -#line 97 "hb-ot-shaper-myanmar-machine.rl" +#line 109 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_broken_cluster); }} break; case 3: -#line 98 "hb-ot-shaper-myanmar-machine.rl" +#line 110 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }} break; case 5: -#line 94 "hb-ot-shaper-myanmar-machine.rl" +#line 106 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_consonant_syllable); }} break; case 7: -#line 97 "hb-ot-shaper-myanmar-machine.rl" +#line 109 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_broken_cluster); }} break; case 9: -#line 98 "hb-ot-shaper-myanmar-machine.rl" +#line 110 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_non_myanmar_cluster); }} break; -#line 457 "hb-ot-shaper-myanmar-machine.hh" +#line 469 "hb-ot-shaper-myanmar-machine.hh" } _again: @@ -462,7 +474,7 @@ _again: #line 1 "NONE" {ts = 0;} break; -#line 466 "hb-ot-shaper-myanmar-machine.hh" +#line 478 "hb-ot-shaper-myanmar-machine.hh" } if ( ++p != pe ) @@ -478,7 +490,7 @@ _again: } -#line 130 "hb-ot-shaper-myanmar-machine.rl" +#line 142 "hb-ot-shaper-myanmar-machine.rl" } diff --git a/src/hb-ot-shaper-myanmar-machine.rl b/src/hb-ot-shaper-myanmar-machine.rl index 57f0f16ec..ecabb7062 100644 --- a/src/hb-ot-shaper-myanmar-machine.rl +++ b/src/hb-ot-shaper-myanmar-machine.rl @@ -29,6 +29,18 @@ #include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-shaper-indic.hh" + +/* buffer var allocations */ +#define myanmar_category() indic_category() /* myanmar_category_t */ +#define myanmar_position() indic_position() /* myanmar_position_t */ + +using myanmar_category_t = unsigned; +using myanmar_position_t = indic_position_t; + +#define M_Cat(Cat) myanmar_syllable_machine_ex_##Cat + enum myanmar_syllable_type_t { myanmar_consonant_syllable, myanmar_punctuation_cluster, @@ -46,30 +58,30 @@ enum myanmar_syllable_type_t { %%{ export A = 9; -export As = 18; +export As = 18; # Asat export C = 1; -export D = 10; -export D0 = 20; -export DB = 3; -export GB = 10; +export D = 10; # Digits except zero = GB +export D0 = 20; # Digit zero +export DB = 3; # Dot below = OT_N +export GB = 10; # = OT_PLACEHOLDER export H = 4; export IV = 2; -export MH = 21; -export ML = 32; -export MR = 22; -export MW = 23; -export MY = 24; -export PT = 25; -export V = 8; +export MH = 21; # Medial +export MR = 22; # Medial +export MW = 23; # Medial +export MY = 24; # Medial +export ML = 32; # Consonant medials +export PT = 25; # Pwo and other tones +export V = 8; # Visarga and Shan tones export VAbv = 26; export VBlw = 27; export VPre = 28; export VPst = 29; -export VS = 30; +export VS = 30; # Variation selectors export ZWJ = 6; export ZWNJ = 5; export Ra = 15; -export P = 31; +export P = 31; # Punctuation export CS = 19; j = ZWJ|ZWNJ; # Joiners diff --git a/src/hb-ot-shaper-myanmar.cc b/src/hb-ot-shaper-myanmar.cc index ecb4cf1ab..af037e9ec 100644 --- a/src/hb-ot-shaper-myanmar.cc +++ b/src/hb-ot-shaper-myanmar.cc @@ -28,8 +28,9 @@ #ifndef HB_NO_OT_SHAPE -#include "hb-ot-shaper-myanmar.hh" #include "hb-ot-shaper-myanmar-machine.hh" +#include "hb-ot-shaper-indic.hh" +#include "hb-ot-layout.hh" /* @@ -62,6 +63,118 @@ myanmar_other_features[] = HB_TAG('p','s','t','s'), }; +static inline void +set_myanmar_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + unsigned int cat = type & 0xFFu; + myanmar_position_t pos = (myanmar_position_t) (type >> 8); + + /* Myanmar + * https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze + */ + if (unlikely (hb_in_range (u, 0xFE00u, 0xFE0Fu))) + cat = M_Cat(VS); + + switch (u) + { + case 0x104Eu: + cat = M_Cat(C); /* The spec says C, IndicSyllableCategory doesn't have. */ + break; + + case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: + case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: + case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: + case 0x25FEu: + cat = M_Cat(GB); + break; + + case 0x1004u: case 0x101Bu: case 0x105Au: + cat = M_Cat(Ra); + break; + + case 0x1032u: case 0x1036u: + cat = M_Cat(A); + break; + + case 0x1039u: + cat = M_Cat(H); + break; + + case 0x103Au: + cat = M_Cat(As); + break; + + case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: + case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: + case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: + case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: + case 0x1097u: case 0x1098u: case 0x1099u: + cat = M_Cat(D); + break; + + case 0x1040u: + cat = M_Cat(D); /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ + break; + + case 0x103Eu: + cat = M_Cat(MH); + break; + + case 0x1060u: + cat = M_Cat(ML); + break; + + case 0x103Cu: + cat = M_Cat(MR); + break; + + case 0x103Du: case 0x1082u: + cat = M_Cat(MW); + break; + + case 0x103Bu: case 0x105Eu: case 0x105Fu: + cat = M_Cat(MY); + break; + + case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: + case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: + cat = M_Cat(PT); + break; + + case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: + case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: + case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: + cat = M_Cat(V); + break; + + case 0x104Au: case 0x104Bu: + cat = M_Cat(P); + break; + + case 0xAA74u: case 0xAA75u: case 0xAA76u: + /* https://github.com/harfbuzz/harfbuzz/issues/218 */ + cat = M_Cat(C); + break; + } + + if (cat == OT_M) + { + switch ((int) pos) + { + case POS_PRE_C: cat = (myanmar_category_t) M_Cat(VPre); + pos = POS_PRE_M; break; + case POS_ABOVE_C: cat = (myanmar_category_t) M_Cat(VAbv); break; + case POS_BELOW_C: cat = (myanmar_category_t) M_Cat(VBlw); break; + case POS_POST_C: cat = (myanmar_category_t) M_Cat(VPst); break; + } + } + + info.myanmar_category() = cat; + info.myanmar_position() = pos; +} + static void setup_syllables_myanmar (const hb_ot_shape_plan_t *plan, hb_font_t *font, @@ -150,9 +263,9 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer, { unsigned int limit = start; if (start + 3 <= end && - info[start ].myanmar_category() == OT_Ra && - info[start+1].myanmar_category() == OT_As && - info[start+2].myanmar_category() == OT_H) + info[start ].myanmar_category() == M_Cat(Ra) && + info[start+1].myanmar_category() == M_Cat(As) && + info[start+2].myanmar_category() == M_Cat(H)) { limit += 3; base = start; @@ -189,7 +302,7 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer, * Myanmar reordering! */ for (; i < end; i++) { - if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */ + if (info[i].myanmar_category() == M_Cat(MR)) /* Pre-base reordering */ { info[i].myanmar_position() = POS_PRE_C; continue; @@ -198,30 +311,30 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer, { continue; } - if (info[i].myanmar_category() == OT_VS) + if (info[i].myanmar_category() == M_Cat(VS)) { info[i].myanmar_position() = info[i - 1].myanmar_position(); continue; } - if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw) + if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == M_Cat(VBlw)) { pos = POS_BELOW_C; info[i].myanmar_position() = pos; continue; } - if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A) + if (pos == POS_BELOW_C && info[i].myanmar_category() == M_Cat(A)) { info[i].myanmar_position() = POS_BEFORE_SUB; continue; } - if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw) + if (pos == POS_BELOW_C && info[i].myanmar_category() == M_Cat(VBlw)) { info[i].myanmar_position() = pos; continue; } - if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A) + if (pos == POS_BELOW_C && info[i].myanmar_category() != M_Cat(A)) { pos = POS_AFTER_SUB; info[i].myanmar_position() = pos; @@ -264,7 +377,7 @@ reorder_myanmar (const hb_ot_shape_plan_t *plan, { hb_syllabic_insert_dotted_circles (font, buffer, myanmar_broken_cluster, - OT_GB); + M_Cat(GB)); foreach_syllable (buffer, start, end) reorder_syllable_myanmar (plan, font->face, buffer, start, end); diff --git a/src/hb-ot-shaper-myanmar.hh b/src/hb-ot-shaper-myanmar.hh deleted file mode 100644 index 212e290a3..000000000 --- a/src/hb-ot-shaper-myanmar.hh +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright © 2018 Google, Inc. - * - * This is part of HarfBuzz, a text shaping library. - * - * Permission is hereby granted, without written agreement and without - * license or royalty fees, to use, copy, modify, and distribute this - * software and its documentation for any purpose, provided that the - * above copyright notice and the following two paragraphs appear in - * all copies of this software. - * - * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN - * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO - * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - * - * Google Author(s): Behdad Esfahbod - */ - -#ifndef HB_OT_SHAPER_MYANMAR_HH -#define HB_OT_SHAPER_MYANMAR_HH - -#include "hb.hh" - -#include "hb-ot-shaper-indic.hh" - - -/* buffer var allocations */ -#define myanmar_category() indic_category() /* myanmar_category_t */ -#define myanmar_position() indic_position() /* myanmar_position_t */ - - -/* Note: This enum is duplicated in the -machine.rl source file. - * Not sure how to avoid duplication. */ -enum myanmar_category_t { - OT_As = 18, /* Asat */ - OT_D0 = 20, /* Digit zero */ - OT_DB = OT_N, /* Dot below */ - OT_GB = OT_PLACEHOLDER, - OT_MH = 21, /* Various consonant medial types */ - OT_MR = 22, /* Various consonant medial types */ - OT_MW = 23, /* Various consonant medial types */ - OT_MY = 24, /* Various consonant medial types */ - OT_PT = 25, /* Pwo and other tones */ - //OT_VAbv = 26, - //OT_VBlw = 27, - //OT_VPre = 28, - //OT_VPst = 29, - OT_VS = 30, /* Variation selectors */ - OT_P = 31, /* Punctuation */ - OT_D = OT_GB, /* Digits except zero */ - OT_ML = 32, /* Various consonant medial types */ -}; - -using myanmar_position_t = indic_position_t; - -static inline void -set_myanmar_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - unsigned int cat = type & 0xFFu; - myanmar_position_t pos = (myanmar_position_t) (type >> 8); - - /* Myanmar - * https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze - */ - if (unlikely (hb_in_range (u, 0xFE00u, 0xFE0Fu))) - cat = OT_VS; - - switch (u) - { - case 0x104Eu: - cat = OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */ - break; - - case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: - case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: - case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: - case 0x25FEu: - cat = OT_GB; - break; - - case 0x1004u: case 0x101Bu: case 0x105Au: - cat = OT_Ra; - break; - - case 0x1032u: case 0x1036u: - cat = OT_A; - break; - - case 0x1039u: - cat = OT_H; - break; - - case 0x103Au: - cat = OT_As; - break; - - case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: - case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: - case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: - case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: - case 0x1097u: case 0x1098u: case 0x1099u: - cat = OT_D; - break; - - case 0x1040u: - cat = OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ - break; - - case 0x103Eu: - cat = OT_MH; - break; - - case 0x1060u: - cat = OT_ML; - break; - - case 0x103Cu: - cat = OT_MR; - break; - - case 0x103Du: case 0x1082u: - cat = OT_MW; - break; - - case 0x103Bu: case 0x105Eu: case 0x105Fu: - cat = OT_MY; - break; - - case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: - case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: - cat = OT_PT; - break; - - case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: - case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: - case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: - cat = OT_SM; - break; - - case 0x104Au: case 0x104Bu: - cat = OT_P; - break; - - case 0xAA74u: case 0xAA75u: case 0xAA76u: - /* https://github.com/harfbuzz/harfbuzz/issues/218 */ - cat = OT_C; - break; - } - - if (cat == OT_M) - { - switch ((int) pos) - { - case POS_PRE_C: cat = (myanmar_category_t) OT_VPre; - pos = POS_PRE_M; break; - case POS_ABOVE_C: cat = (myanmar_category_t) OT_VAbv; break; - case POS_BELOW_C: cat = (myanmar_category_t) OT_VBlw; break; - case POS_POST_C: cat = (myanmar_category_t) OT_VPst; break; - } - } - - info.myanmar_category() = cat; - info.myanmar_position() = pos; -} - - -#endif /* HB_OT_SHAPER_MYANMAR_HH */ diff --git a/src/meson.build b/src/meson.build index 29877698f..bca289dc5 100644 --- a/src/meson.build +++ b/src/meson.build @@ -149,7 +149,6 @@ hb_base_sources = files( 'hb-ot-shaper-indic.hh', 'hb-ot-shaper-khmer.cc', 'hb-ot-shaper-myanmar.cc', - 'hb-ot-shaper-myanmar.hh', 'hb-ot-shaper-syllabic.cc', 'hb-ot-shaper-syllabic.hh', 'hb-ot-shaper-thai.cc',