From 8533214ac567145cfcdc54f59ec58b8ad0b749b6 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 11 Jun 2022 08:49:36 -0600 Subject: [PATCH] [khmer] Fold category Coeng completely into category H --- src/gen-indic-table.py | 4 +--- src/hb-ot-shaper-indic-machine.hh | 36 ++++++++++++++--------------- src/hb-ot-shaper-indic-table.cc | 9 +++----- src/hb-ot-shaper-khmer-machine.hh | 2 +- src/hb-ot-shaper-khmer-machine.rl | 8 ++++--- src/hb-ot-shaper-khmer.cc | 2 +- src/hb-ot-shaper-myanmar-machine.hh | 22 +++++++++--------- 7 files changed, 40 insertions(+), 43 deletions(-) diff --git a/src/gen-indic-table.py b/src/gen-indic-table.py index 7aeb998d6..d0f8ad7fd 100755 --- a/src/gen-indic-table.py +++ b/src/gen-indic-table.py @@ -110,7 +110,6 @@ categories = { 'VPre', 'VPst', - 'Coeng', 'Robatic', 'Xgroup', 'Ygroup', @@ -158,7 +157,7 @@ category_map = { 'Consonant_Succeeding_Repha' : 'CM', 'Consonant_With_Stacker' : 'CS', 'Gemination_Mark' : 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552 - 'Invisible_Stacker' : 'Coeng', + 'Invisible_Stacker' : 'H', 'Joiner' : 'ZWJ', 'Modifying_Letter' : 'X', 'Non_Joiner' : 'ZWNJ', @@ -535,7 +534,6 @@ print () # Shorten values short = [{ "Repha": 'Rf', - "Coeng": 'Co', "PLACEHOLDER": 'GB', "DOTTEDCIRCLE": 'DC', "VPst": 'VR', diff --git a/src/hb-ot-shaper-indic-machine.hh b/src/hb-ot-shaper-indic-machine.hh index d4fee51ab..9e36bc144 100644 --- a/src/hb-ot-shaper-indic-machine.hh +++ b/src/hb-ot-shaper-indic-machine.hh @@ -418,7 +418,7 @@ static const int indic_syllable_machine_en_main = 39; -#line 120 "hb-ot-shaper-indic-machine.rl" +#line 118 "hb-ot-shaper-indic-machine.rl" #define found_syllable(syllable_type) \ @@ -445,7 +445,7 @@ find_syllables_indic (hb_buffer_t *buffer) act = 0; } -#line 140 "hb-ot-shaper-indic-machine.rl" +#line 138 "hb-ot-shaper-indic-machine.rl" p = 0; @@ -490,51 +490,51 @@ _eof_trans: {te = p+1;} break; case 11: -#line 116 "hb-ot-shaper-indic-machine.rl" +#line 114 "hb-ot-shaper-indic-machine.rl" {te = p+1;{ found_syllable (indic_non_indic_cluster); }} break; case 13: -#line 111 "hb-ot-shaper-indic-machine.rl" +#line 109 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_consonant_syllable); }} break; case 14: -#line 112 "hb-ot-shaper-indic-machine.rl" +#line 110 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_vowel_syllable); }} break; case 17: -#line 113 "hb-ot-shaper-indic-machine.rl" +#line 111 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_standalone_cluster); }} break; case 19: -#line 114 "hb-ot-shaper-indic-machine.rl" +#line 112 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_symbol_cluster); }} break; case 15: -#line 115 "hb-ot-shaper-indic-machine.rl" +#line 113 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 16: -#line 116 "hb-ot-shaper-indic-machine.rl" +#line 114 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_non_indic_cluster); }} break; case 1: -#line 111 "hb-ot-shaper-indic-machine.rl" +#line 109 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_consonant_syllable); }} break; case 3: -#line 112 "hb-ot-shaper-indic-machine.rl" +#line 110 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_vowel_syllable); }} break; case 7: -#line 113 "hb-ot-shaper-indic-machine.rl" +#line 111 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_standalone_cluster); }} break; case 8: -#line 114 "hb-ot-shaper-indic-machine.rl" +#line 112 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_symbol_cluster); }} break; case 4: -#line 115 "hb-ot-shaper-indic-machine.rl" +#line 113 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 6: @@ -555,19 +555,19 @@ _eof_trans: case 18: #line 1 "NONE" {te = p+1;} -#line 111 "hb-ot-shaper-indic-machine.rl" +#line 109 "hb-ot-shaper-indic-machine.rl" {act = 1;} break; case 5: #line 1 "NONE" {te = p+1;} -#line 115 "hb-ot-shaper-indic-machine.rl" +#line 113 "hb-ot-shaper-indic-machine.rl" {act = 5;} break; case 12: #line 1 "NONE" {te = p+1;} -#line 116 "hb-ot-shaper-indic-machine.rl" +#line 114 "hb-ot-shaper-indic-machine.rl" {act = 6;} break; #line 574 "hb-ot-shaper-indic-machine.hh" @@ -595,7 +595,7 @@ _again: } -#line 148 "hb-ot-shaper-indic-machine.rl" +#line 146 "hb-ot-shaper-indic-machine.rl" } diff --git a/src/hb-ot-shaper-indic-table.cc b/src/hb-ot-shaper-indic-table.cc index 2c3a1ecad..fa2fa4413 100644 --- a/src/hb-ot-shaper-indic-table.cc +++ b/src/hb-ot-shaper-indic-table.cc @@ -52,7 +52,6 @@ #define OT_VBlw K_Cat(VBlw) #define OT_VPre K_Cat(VPre) #define OT_VPst K_Cat(VPst) -#define OT_Coeng K_Cat(Coeng) #define OT_Robatic K_Cat(Robatic) #define OT_Xgroup K_Cat(Xgroup) #define OT_Ygroup K_Cat(Ygroup) @@ -82,9 +81,8 @@ static_assert (OT_VPst == M_Cat(VPst), ""); #define _OT_C OT_C /* 518 chars; C */ #define _OT_CM OT_CM /* 1 chars; CM */ #define _OT_CS OT_CS /* 2 chars; CS */ -#define _OT_Co OT_Coeng /* 2 chars; Coeng */ #define _OT_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */ -#define _OT_H OT_H /* 10 chars; H */ +#define _OT_H OT_H /* 12 chars; H */ #define _OT_M OT_M /* 160 chars; M */ #define _OT_MH OT_MH /* 1 chars; MH */ #define _OT_ML OT_ML /* 1 chars; ML */ @@ -359,7 +357,7 @@ static const uint16_t indic_table[] = { /* 1020 */ _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), /* 1028 */ _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R), _(VA,T), _(VA,T), _(VB,B), /* 1030 */ _(VB,B), _(VL,L), _(A,SM), _(VA,T), _(VA,T), _(VA,T), _(A,SM), _(N,X), - /* 1038 */_(SM,SM), _(Co,X), _(As,X), _(MY,X), _(MR,X), _(MW,X), _(MH,X), _(C,C), + /* 1038 */_(SM,SM), _(H,X), _(As,X), _(MY,X), _(MR,X), _(MW,X), _(MH,X), _(C,C), /* 1040 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), /* 1048 */ _(GB,C), _(GB,C), _(P,X), _(P,X), _(X,X), _(X,X), _(C,C), _(X,X), /* 1050 */ _(C,C), _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R), @@ -388,7 +386,7 @@ static const uint16_t indic_table[] = { /* 17B8 */ _(VA,T), _(VA,T), _(VA,T), _(VB,B), _(VB,B), _(VB,B), _(VA,T), _(VR,R), /* 17C0 */ _(VR,R), _(VL,L), _(VL,L), _(VL,L), _(VR,R), _(VR,R), _(Xg,X), _(Yg,X), /* 17C8 */ _(Yg,X), _(Rt,X), _(Rt,X), _(Xg,X), _(Rt,X), _(Xg,X), _(Xg,X), _(Xg,X), - /* 17D0 */ _(Xg,X), _(Xg,X), _(Co,X), _(Yg,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 17D0 */ _(Xg,X), _(Xg,X), _(H,X), _(Yg,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 17D8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(S,SM), _(Yg,X), _(X,X), _(X,X), /* 17E0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), /* 17E8 */ _(GB,C), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), @@ -544,7 +542,6 @@ hb_indic_get_categories (hb_codepoint_t u) #undef _OT_C #undef _OT_CM #undef _OT_CS -#undef _OT_Co #undef _OT_DC #undef _OT_H #undef _OT_M diff --git a/src/hb-ot-shaper-khmer-machine.hh b/src/hb-ot-shaper-khmer-machine.hh index e7fa71a83..43594c2c6 100644 --- a/src/hb-ot-shaper-khmer-machine.hh +++ b/src/hb-ot-shaper-khmer-machine.hh @@ -50,8 +50,8 @@ enum khmer_syllable_type_t { #line 52 "hb-ot-shaper-khmer-machine.hh" #define khmer_syllable_machine_ex_C 1u -#define khmer_syllable_machine_ex_Coeng 4u #define khmer_syllable_machine_ex_DOTTEDCIRCLE 11u +#define khmer_syllable_machine_ex_H 4u #define khmer_syllable_machine_ex_PLACEHOLDER 10u #define khmer_syllable_machine_ex_Ra 15u #define khmer_syllable_machine_ex_Robatic 25u diff --git a/src/hb-ot-shaper-khmer-machine.rl b/src/hb-ot-shaper-khmer-machine.rl index 4a28b0448..2018be58c 100644 --- a/src/hb-ot-shaper-khmer-machine.rl +++ b/src/hb-ot-shaper-khmer-machine.rl @@ -55,8 +55,11 @@ enum khmer_syllable_type_t { %%{ +# We use category H for spec category Coeng + export C = 1; export V = 2; +export H = 4; export ZWNJ = 5; export ZWJ = 6; export PLACEHOLDER = 10; @@ -68,7 +71,6 @@ export VBlw = 21; export VPre = 22; export VPst = 23; -export Coeng = 4; export Robatic = 25; export Xgroup = 26; export Ygroup = 27; @@ -83,10 +85,10 @@ ygroup = Ygroup*; # This grammar was experimentally extracted from what Uniscribe allows. matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?; -syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup; +syllable_tail = xgroup matra_group xgroup (H.c)? ygroup; -broken_cluster = (Coeng.cn)* (Coeng | syllable_tail); +broken_cluster = (H.cn)* (H | syllable_tail); consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster; other = any; diff --git a/src/hb-ot-shaper-khmer.cc b/src/hb-ot-shaper-khmer.cc index f19a234bf..e04d63319 100644 --- a/src/hb-ot-shaper-khmer.cc +++ b/src/hb-ot-shaper-khmer.cc @@ -241,7 +241,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan, * the 'pref' OpenType feature applied to them. * """ */ - if (info[i].khmer_category() == K_Cat(Coeng) && num_coengs <= 2 && i + 1 < end) + if (info[i].khmer_category() == K_Cat(H) && num_coengs <= 2 && i + 1 < end) { num_coengs++; diff --git a/src/hb-ot-shaper-myanmar-machine.hh b/src/hb-ot-shaper-myanmar-machine.hh index 42acadbb7..281e8452d 100644 --- a/src/hb-ot-shaper-myanmar-machine.hh +++ b/src/hb-ot-shaper-myanmar-machine.hh @@ -435,7 +435,7 @@ static const int myanmar_syllable_machine_en_main = 0; -#line 121 "hb-ot-shaper-myanmar-machine.rl" +#line 120 "hb-ot-shaper-myanmar-machine.rl" #define found_syllable(syllable_type) \ @@ -462,7 +462,7 @@ find_syllables_myanmar (hb_buffer_t *buffer) act = 0; } -#line 141 "hb-ot-shaper-myanmar-machine.rl" +#line 140 "hb-ot-shaper-myanmar-machine.rl" p = 0; @@ -503,35 +503,35 @@ _eof_trans: switch ( _myanmar_syllable_machine_trans_actions[_trans] ) { case 6: -#line 113 "hb-ot-shaper-myanmar-machine.rl" +#line 112 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_consonant_syllable); }} break; case 4: -#line 114 "hb-ot-shaper-myanmar-machine.rl" +#line 113 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }} break; case 10: -#line 115 "hb-ot-shaper-myanmar-machine.rl" +#line 114 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_punctuation_cluster); }} break; case 8: -#line 116 "hb-ot-shaper-myanmar-machine.rl" +#line 115 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 3: -#line 117 "hb-ot-shaper-myanmar-machine.rl" +#line 116 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }} break; case 5: -#line 113 "hb-ot-shaper-myanmar-machine.rl" +#line 112 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_consonant_syllable); }} break; case 7: -#line 116 "hb-ot-shaper-myanmar-machine.rl" +#line 115 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 9: -#line 117 "hb-ot-shaper-myanmar-machine.rl" +#line 116 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_non_myanmar_cluster); }} break; #line 538 "hb-ot-shaper-myanmar-machine.hh" @@ -559,7 +559,7 @@ _again: } -#line 149 "hb-ot-shaper-myanmar-machine.rl" +#line 148 "hb-ot-shaper-myanmar-machine.rl" }