[khmer] Fold category Coeng completely into category H

This commit is contained in:
Behdad Esfahbod 2022-06-11 08:49:36 -06:00
parent 607a9fe793
commit 8533214ac5
7 changed files with 40 additions and 43 deletions

View File

@ -110,7 +110,6 @@ categories = {
'VPre', 'VPre',
'VPst', 'VPst',
'Coeng',
'Robatic', 'Robatic',
'Xgroup', 'Xgroup',
'Ygroup', 'Ygroup',
@ -158,7 +157,7 @@ category_map = {
'Consonant_Succeeding_Repha' : 'CM', 'Consonant_Succeeding_Repha' : 'CM',
'Consonant_With_Stacker' : 'CS', 'Consonant_With_Stacker' : 'CS',
'Gemination_Mark' : 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552 'Gemination_Mark' : 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552
'Invisible_Stacker' : 'Coeng', 'Invisible_Stacker' : 'H',
'Joiner' : 'ZWJ', 'Joiner' : 'ZWJ',
'Modifying_Letter' : 'X', 'Modifying_Letter' : 'X',
'Non_Joiner' : 'ZWNJ', 'Non_Joiner' : 'ZWNJ',
@ -535,7 +534,6 @@ print ()
# Shorten values # Shorten values
short = [{ short = [{
"Repha": 'Rf', "Repha": 'Rf',
"Coeng": 'Co',
"PLACEHOLDER": 'GB', "PLACEHOLDER": 'GB',
"DOTTEDCIRCLE": 'DC', "DOTTEDCIRCLE": 'DC',
"VPst": 'VR', "VPst": 'VR',

View File

@ -418,7 +418,7 @@ static const int indic_syllable_machine_en_main = 39;
#line 120 "hb-ot-shaper-indic-machine.rl" #line 118 "hb-ot-shaper-indic-machine.rl"
#define found_syllable(syllable_type) \ #define found_syllable(syllable_type) \
@ -445,7 +445,7 @@ find_syllables_indic (hb_buffer_t *buffer)
act = 0; act = 0;
} }
#line 140 "hb-ot-shaper-indic-machine.rl" #line 138 "hb-ot-shaper-indic-machine.rl"
p = 0; p = 0;
@ -490,51 +490,51 @@ _eof_trans:
{te = p+1;} {te = p+1;}
break; break;
case 11: case 11:
#line 116 "hb-ot-shaper-indic-machine.rl" #line 114 "hb-ot-shaper-indic-machine.rl"
{te = p+1;{ found_syllable (indic_non_indic_cluster); }} {te = p+1;{ found_syllable (indic_non_indic_cluster); }}
break; break;
case 13: case 13:
#line 111 "hb-ot-shaper-indic-machine.rl" #line 109 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_consonant_syllable); }} {te = p;p--;{ found_syllable (indic_consonant_syllable); }}
break; break;
case 14: case 14:
#line 112 "hb-ot-shaper-indic-machine.rl" #line 110 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_vowel_syllable); }} {te = p;p--;{ found_syllable (indic_vowel_syllable); }}
break; break;
case 17: case 17:
#line 113 "hb-ot-shaper-indic-machine.rl" #line 111 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_standalone_cluster); }} {te = p;p--;{ found_syllable (indic_standalone_cluster); }}
break; break;
case 19: case 19:
#line 114 "hb-ot-shaper-indic-machine.rl" #line 112 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_symbol_cluster); }} {te = p;p--;{ found_syllable (indic_symbol_cluster); }}
break; break;
case 15: case 15:
#line 115 "hb-ot-shaper-indic-machine.rl" #line 113 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} {te = p;p--;{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break; break;
case 16: case 16:
#line 116 "hb-ot-shaper-indic-machine.rl" #line 114 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_non_indic_cluster); }} {te = p;p--;{ found_syllable (indic_non_indic_cluster); }}
break; break;
case 1: case 1:
#line 111 "hb-ot-shaper-indic-machine.rl" #line 109 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_consonant_syllable); }} {{p = ((te))-1;}{ found_syllable (indic_consonant_syllable); }}
break; break;
case 3: case 3:
#line 112 "hb-ot-shaper-indic-machine.rl" #line 110 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_vowel_syllable); }} {{p = ((te))-1;}{ found_syllable (indic_vowel_syllable); }}
break; break;
case 7: case 7:
#line 113 "hb-ot-shaper-indic-machine.rl" #line 111 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_standalone_cluster); }} {{p = ((te))-1;}{ found_syllable (indic_standalone_cluster); }}
break; break;
case 8: case 8:
#line 114 "hb-ot-shaper-indic-machine.rl" #line 112 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_symbol_cluster); }} {{p = ((te))-1;}{ found_syllable (indic_symbol_cluster); }}
break; break;
case 4: case 4:
#line 115 "hb-ot-shaper-indic-machine.rl" #line 113 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} {{p = ((te))-1;}{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break; break;
case 6: case 6:
@ -555,19 +555,19 @@ _eof_trans:
case 18: case 18:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 111 "hb-ot-shaper-indic-machine.rl" #line 109 "hb-ot-shaper-indic-machine.rl"
{act = 1;} {act = 1;}
break; break;
case 5: case 5:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 115 "hb-ot-shaper-indic-machine.rl" #line 113 "hb-ot-shaper-indic-machine.rl"
{act = 5;} {act = 5;}
break; break;
case 12: case 12:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 116 "hb-ot-shaper-indic-machine.rl" #line 114 "hb-ot-shaper-indic-machine.rl"
{act = 6;} {act = 6;}
break; break;
#line 574 "hb-ot-shaper-indic-machine.hh" #line 574 "hb-ot-shaper-indic-machine.hh"
@ -595,7 +595,7 @@ _again:
} }
#line 148 "hb-ot-shaper-indic-machine.rl" #line 146 "hb-ot-shaper-indic-machine.rl"
} }

View File

@ -52,7 +52,6 @@
#define OT_VBlw K_Cat(VBlw) #define OT_VBlw K_Cat(VBlw)
#define OT_VPre K_Cat(VPre) #define OT_VPre K_Cat(VPre)
#define OT_VPst K_Cat(VPst) #define OT_VPst K_Cat(VPst)
#define OT_Coeng K_Cat(Coeng)
#define OT_Robatic K_Cat(Robatic) #define OT_Robatic K_Cat(Robatic)
#define OT_Xgroup K_Cat(Xgroup) #define OT_Xgroup K_Cat(Xgroup)
#define OT_Ygroup K_Cat(Ygroup) #define OT_Ygroup K_Cat(Ygroup)
@ -82,9 +81,8 @@ static_assert (OT_VPst == M_Cat(VPst), "");
#define _OT_C OT_C /* 518 chars; C */ #define _OT_C OT_C /* 518 chars; C */
#define _OT_CM OT_CM /* 1 chars; CM */ #define _OT_CM OT_CM /* 1 chars; CM */
#define _OT_CS OT_CS /* 2 chars; CS */ #define _OT_CS OT_CS /* 2 chars; CS */
#define _OT_Co OT_Coeng /* 2 chars; Coeng */
#define _OT_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */ #define _OT_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */
#define _OT_H OT_H /* 10 chars; H */ #define _OT_H OT_H /* 12 chars; H */
#define _OT_M OT_M /* 160 chars; M */ #define _OT_M OT_M /* 160 chars; M */
#define _OT_MH OT_MH /* 1 chars; MH */ #define _OT_MH OT_MH /* 1 chars; MH */
#define _OT_ML OT_ML /* 1 chars; ML */ #define _OT_ML OT_ML /* 1 chars; ML */
@ -359,7 +357,7 @@ static const uint16_t indic_table[] = {
/* 1020 */ _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), /* 1020 */ _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C),
/* 1028 */ _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R), _(VA,T), _(VA,T), _(VB,B), /* 1028 */ _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R), _(VA,T), _(VA,T), _(VB,B),
/* 1030 */ _(VB,B), _(VL,L), _(A,SM), _(VA,T), _(VA,T), _(VA,T), _(A,SM), _(N,X), /* 1030 */ _(VB,B), _(VL,L), _(A,SM), _(VA,T), _(VA,T), _(VA,T), _(A,SM), _(N,X),
/* 1038 */_(SM,SM), _(Co,X), _(As,X), _(MY,X), _(MR,X), _(MW,X), _(MH,X), _(C,C), /* 1038 */_(SM,SM), _(H,X), _(As,X), _(MY,X), _(MR,X), _(MW,X), _(MH,X), _(C,C),
/* 1040 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), /* 1040 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C),
/* 1048 */ _(GB,C), _(GB,C), _(P,X), _(P,X), _(X,X), _(X,X), _(C,C), _(X,X), /* 1048 */ _(GB,C), _(GB,C), _(P,X), _(P,X), _(X,X), _(X,X), _(C,C), _(X,X),
/* 1050 */ _(C,C), _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R), /* 1050 */ _(C,C), _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R),
@ -388,7 +386,7 @@ static const uint16_t indic_table[] = {
/* 17B8 */ _(VA,T), _(VA,T), _(VA,T), _(VB,B), _(VB,B), _(VB,B), _(VA,T), _(VR,R), /* 17B8 */ _(VA,T), _(VA,T), _(VA,T), _(VB,B), _(VB,B), _(VB,B), _(VA,T), _(VR,R),
/* 17C0 */ _(VR,R), _(VL,L), _(VL,L), _(VL,L), _(VR,R), _(VR,R), _(Xg,X), _(Yg,X), /* 17C0 */ _(VR,R), _(VL,L), _(VL,L), _(VL,L), _(VR,R), _(VR,R), _(Xg,X), _(Yg,X),
/* 17C8 */ _(Yg,X), _(Rt,X), _(Rt,X), _(Xg,X), _(Rt,X), _(Xg,X), _(Xg,X), _(Xg,X), /* 17C8 */ _(Yg,X), _(Rt,X), _(Rt,X), _(Xg,X), _(Rt,X), _(Xg,X), _(Xg,X), _(Xg,X),
/* 17D0 */ _(Xg,X), _(Xg,X), _(Co,X), _(Yg,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 17D0 */ _(Xg,X), _(Xg,X), _(H,X), _(Yg,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* 17D8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(S,SM), _(Yg,X), _(X,X), _(X,X), /* 17D8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(S,SM), _(Yg,X), _(X,X), _(X,X),
/* 17E0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), /* 17E0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C),
/* 17E8 */ _(GB,C), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 17E8 */ _(GB,C), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
@ -544,7 +542,6 @@ hb_indic_get_categories (hb_codepoint_t u)
#undef _OT_C #undef _OT_C
#undef _OT_CM #undef _OT_CM
#undef _OT_CS #undef _OT_CS
#undef _OT_Co
#undef _OT_DC #undef _OT_DC
#undef _OT_H #undef _OT_H
#undef _OT_M #undef _OT_M

View File

@ -50,8 +50,8 @@ enum khmer_syllable_type_t {
#line 52 "hb-ot-shaper-khmer-machine.hh" #line 52 "hb-ot-shaper-khmer-machine.hh"
#define khmer_syllable_machine_ex_C 1u #define khmer_syllable_machine_ex_C 1u
#define khmer_syllable_machine_ex_Coeng 4u
#define khmer_syllable_machine_ex_DOTTEDCIRCLE 11u #define khmer_syllable_machine_ex_DOTTEDCIRCLE 11u
#define khmer_syllable_machine_ex_H 4u
#define khmer_syllable_machine_ex_PLACEHOLDER 10u #define khmer_syllable_machine_ex_PLACEHOLDER 10u
#define khmer_syllable_machine_ex_Ra 15u #define khmer_syllable_machine_ex_Ra 15u
#define khmer_syllable_machine_ex_Robatic 25u #define khmer_syllable_machine_ex_Robatic 25u

View File

@ -55,8 +55,11 @@ enum khmer_syllable_type_t {
%%{ %%{
# We use category H for spec category Coeng
export C = 1; export C = 1;
export V = 2; export V = 2;
export H = 4;
export ZWNJ = 5; export ZWNJ = 5;
export ZWJ = 6; export ZWJ = 6;
export PLACEHOLDER = 10; export PLACEHOLDER = 10;
@ -68,7 +71,6 @@ export VBlw = 21;
export VPre = 22; export VPre = 22;
export VPst = 23; export VPst = 23;
export Coeng = 4;
export Robatic = 25; export Robatic = 25;
export Xgroup = 26; export Xgroup = 26;
export Ygroup = 27; export Ygroup = 27;
@ -83,10 +85,10 @@ ygroup = Ygroup*;
# This grammar was experimentally extracted from what Uniscribe allows. # This grammar was experimentally extracted from what Uniscribe allows.
matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?; matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?;
syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup; syllable_tail = xgroup matra_group xgroup (H.c)? ygroup;
broken_cluster = (Coeng.cn)* (Coeng | syllable_tail); broken_cluster = (H.cn)* (H | syllable_tail);
consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster; consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
other = any; other = any;

View File

@ -241,7 +241,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
* the 'pref' OpenType feature applied to them. * the 'pref' OpenType feature applied to them.
* """ * """
*/ */
if (info[i].khmer_category() == K_Cat(Coeng) && num_coengs <= 2 && i + 1 < end) if (info[i].khmer_category() == K_Cat(H) && num_coengs <= 2 && i + 1 < end)
{ {
num_coengs++; num_coengs++;

View File

@ -435,7 +435,7 @@ static const int myanmar_syllable_machine_en_main = 0;
#line 121 "hb-ot-shaper-myanmar-machine.rl" #line 120 "hb-ot-shaper-myanmar-machine.rl"
#define found_syllable(syllable_type) \ #define found_syllable(syllable_type) \
@ -462,7 +462,7 @@ find_syllables_myanmar (hb_buffer_t *buffer)
act = 0; act = 0;
} }
#line 141 "hb-ot-shaper-myanmar-machine.rl" #line 140 "hb-ot-shaper-myanmar-machine.rl"
p = 0; p = 0;
@ -503,35 +503,35 @@ _eof_trans:
switch ( _myanmar_syllable_machine_trans_actions[_trans] ) { switch ( _myanmar_syllable_machine_trans_actions[_trans] ) {
case 6: case 6:
#line 113 "hb-ot-shaper-myanmar-machine.rl" #line 112 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_consonant_syllable); }} {te = p+1;{ found_syllable (myanmar_consonant_syllable); }}
break; break;
case 4: case 4:
#line 114 "hb-ot-shaper-myanmar-machine.rl" #line 113 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }} {te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }}
break; break;
case 10: case 10:
#line 115 "hb-ot-shaper-myanmar-machine.rl" #line 114 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_punctuation_cluster); }} {te = p+1;{ found_syllable (myanmar_punctuation_cluster); }}
break; break;
case 8: case 8:
#line 116 "hb-ot-shaper-myanmar-machine.rl" #line 115 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} {te = p+1;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break; break;
case 3: case 3:
#line 117 "hb-ot-shaper-myanmar-machine.rl" #line 116 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }} {te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }}
break; break;
case 5: case 5:
#line 113 "hb-ot-shaper-myanmar-machine.rl" #line 112 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_consonant_syllable); }} {te = p;p--;{ found_syllable (myanmar_consonant_syllable); }}
break; break;
case 7: case 7:
#line 116 "hb-ot-shaper-myanmar-machine.rl" #line 115 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} {te = p;p--;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break; break;
case 9: case 9:
#line 117 "hb-ot-shaper-myanmar-machine.rl" #line 116 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_non_myanmar_cluster); }} {te = p;p--;{ found_syllable (myanmar_non_myanmar_cluster); }}
break; break;
#line 538 "hb-ot-shaper-myanmar-machine.hh" #line 538 "hb-ot-shaper-myanmar-machine.hh"
@ -559,7 +559,7 @@ _again:
} }
#line 149 "hb-ot-shaper-myanmar-machine.rl" #line 148 "hb-ot-shaper-myanmar-machine.rl"
} }