[khmer] Fold category Coeng completely into category H

This commit is contained in:
Behdad Esfahbod 2022-06-11 08:49:36 -06:00
parent 607a9fe793
commit 8533214ac5
7 changed files with 40 additions and 43 deletions

View File

@ -110,7 +110,6 @@ categories = {
'VPre',
'VPst',
'Coeng',
'Robatic',
'Xgroup',
'Ygroup',
@ -158,7 +157,7 @@ category_map = {
'Consonant_Succeeding_Repha' : 'CM',
'Consonant_With_Stacker' : 'CS',
'Gemination_Mark' : 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552
'Invisible_Stacker' : 'Coeng',
'Invisible_Stacker' : 'H',
'Joiner' : 'ZWJ',
'Modifying_Letter' : 'X',
'Non_Joiner' : 'ZWNJ',
@ -535,7 +534,6 @@ print ()
# Shorten values
short = [{
"Repha": 'Rf',
"Coeng": 'Co',
"PLACEHOLDER": 'GB',
"DOTTEDCIRCLE": 'DC',
"VPst": 'VR',

View File

@ -418,7 +418,7 @@ static const int indic_syllable_machine_en_main = 39;
#line 120 "hb-ot-shaper-indic-machine.rl"
#line 118 "hb-ot-shaper-indic-machine.rl"
#define found_syllable(syllable_type) \
@ -445,7 +445,7 @@ find_syllables_indic (hb_buffer_t *buffer)
act = 0;
}
#line 140 "hb-ot-shaper-indic-machine.rl"
#line 138 "hb-ot-shaper-indic-machine.rl"
p = 0;
@ -490,51 +490,51 @@ _eof_trans:
{te = p+1;}
break;
case 11:
#line 116 "hb-ot-shaper-indic-machine.rl"
#line 114 "hb-ot-shaper-indic-machine.rl"
{te = p+1;{ found_syllable (indic_non_indic_cluster); }}
break;
case 13:
#line 111 "hb-ot-shaper-indic-machine.rl"
#line 109 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_consonant_syllable); }}
break;
case 14:
#line 112 "hb-ot-shaper-indic-machine.rl"
#line 110 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_vowel_syllable); }}
break;
case 17:
#line 113 "hb-ot-shaper-indic-machine.rl"
#line 111 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_standalone_cluster); }}
break;
case 19:
#line 114 "hb-ot-shaper-indic-machine.rl"
#line 112 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_symbol_cluster); }}
break;
case 15:
#line 115 "hb-ot-shaper-indic-machine.rl"
#line 113 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break;
case 16:
#line 116 "hb-ot-shaper-indic-machine.rl"
#line 114 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_non_indic_cluster); }}
break;
case 1:
#line 111 "hb-ot-shaper-indic-machine.rl"
#line 109 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_consonant_syllable); }}
break;
case 3:
#line 112 "hb-ot-shaper-indic-machine.rl"
#line 110 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_vowel_syllable); }}
break;
case 7:
#line 113 "hb-ot-shaper-indic-machine.rl"
#line 111 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_standalone_cluster); }}
break;
case 8:
#line 114 "hb-ot-shaper-indic-machine.rl"
#line 112 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_symbol_cluster); }}
break;
case 4:
#line 115 "hb-ot-shaper-indic-machine.rl"
#line 113 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break;
case 6:
@ -555,19 +555,19 @@ _eof_trans:
case 18:
#line 1 "NONE"
{te = p+1;}
#line 111 "hb-ot-shaper-indic-machine.rl"
#line 109 "hb-ot-shaper-indic-machine.rl"
{act = 1;}
break;
case 5:
#line 1 "NONE"
{te = p+1;}
#line 115 "hb-ot-shaper-indic-machine.rl"
#line 113 "hb-ot-shaper-indic-machine.rl"
{act = 5;}
break;
case 12:
#line 1 "NONE"
{te = p+1;}
#line 116 "hb-ot-shaper-indic-machine.rl"
#line 114 "hb-ot-shaper-indic-machine.rl"
{act = 6;}
break;
#line 574 "hb-ot-shaper-indic-machine.hh"
@ -595,7 +595,7 @@ _again:
}
#line 148 "hb-ot-shaper-indic-machine.rl"
#line 146 "hb-ot-shaper-indic-machine.rl"
}

View File

@ -52,7 +52,6 @@
#define OT_VBlw K_Cat(VBlw)
#define OT_VPre K_Cat(VPre)
#define OT_VPst K_Cat(VPst)
#define OT_Coeng K_Cat(Coeng)
#define OT_Robatic K_Cat(Robatic)
#define OT_Xgroup K_Cat(Xgroup)
#define OT_Ygroup K_Cat(Ygroup)
@ -82,9 +81,8 @@ static_assert (OT_VPst == M_Cat(VPst), "");
#define _OT_C OT_C /* 518 chars; C */
#define _OT_CM OT_CM /* 1 chars; CM */
#define _OT_CS OT_CS /* 2 chars; CS */
#define _OT_Co OT_Coeng /* 2 chars; Coeng */
#define _OT_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */
#define _OT_H OT_H /* 10 chars; H */
#define _OT_H OT_H /* 12 chars; H */
#define _OT_M OT_M /* 160 chars; M */
#define _OT_MH OT_MH /* 1 chars; MH */
#define _OT_ML OT_ML /* 1 chars; ML */
@ -359,7 +357,7 @@ static const uint16_t indic_table[] = {
/* 1020 */ _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C),
/* 1028 */ _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R), _(VA,T), _(VA,T), _(VB,B),
/* 1030 */ _(VB,B), _(VL,L), _(A,SM), _(VA,T), _(VA,T), _(VA,T), _(A,SM), _(N,X),
/* 1038 */_(SM,SM), _(Co,X), _(As,X), _(MY,X), _(MR,X), _(MW,X), _(MH,X), _(C,C),
/* 1038 */_(SM,SM), _(H,X), _(As,X), _(MY,X), _(MR,X), _(MW,X), _(MH,X), _(C,C),
/* 1040 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C),
/* 1048 */ _(GB,C), _(GB,C), _(P,X), _(P,X), _(X,X), _(X,X), _(C,C), _(X,X),
/* 1050 */ _(C,C), _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R),
@ -388,7 +386,7 @@ static const uint16_t indic_table[] = {
/* 17B8 */ _(VA,T), _(VA,T), _(VA,T), _(VB,B), _(VB,B), _(VB,B), _(VA,T), _(VR,R),
/* 17C0 */ _(VR,R), _(VL,L), _(VL,L), _(VL,L), _(VR,R), _(VR,R), _(Xg,X), _(Yg,X),
/* 17C8 */ _(Yg,X), _(Rt,X), _(Rt,X), _(Xg,X), _(Rt,X), _(Xg,X), _(Xg,X), _(Xg,X),
/* 17D0 */ _(Xg,X), _(Xg,X), _(Co,X), _(Yg,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* 17D0 */ _(Xg,X), _(Xg,X), _(H,X), _(Yg,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* 17D8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(S,SM), _(Yg,X), _(X,X), _(X,X),
/* 17E0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C),
/* 17E8 */ _(GB,C), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
@ -544,7 +542,6 @@ hb_indic_get_categories (hb_codepoint_t u)
#undef _OT_C
#undef _OT_CM
#undef _OT_CS
#undef _OT_Co
#undef _OT_DC
#undef _OT_H
#undef _OT_M

View File

@ -50,8 +50,8 @@ enum khmer_syllable_type_t {
#line 52 "hb-ot-shaper-khmer-machine.hh"
#define khmer_syllable_machine_ex_C 1u
#define khmer_syllable_machine_ex_Coeng 4u
#define khmer_syllable_machine_ex_DOTTEDCIRCLE 11u
#define khmer_syllable_machine_ex_H 4u
#define khmer_syllable_machine_ex_PLACEHOLDER 10u
#define khmer_syllable_machine_ex_Ra 15u
#define khmer_syllable_machine_ex_Robatic 25u

View File

@ -55,8 +55,11 @@ enum khmer_syllable_type_t {
%%{
# We use category H for spec category Coeng
export C = 1;
export V = 2;
export H = 4;
export ZWNJ = 5;
export ZWJ = 6;
export PLACEHOLDER = 10;
@ -68,7 +71,6 @@ export VBlw = 21;
export VPre = 22;
export VPst = 23;
export Coeng = 4;
export Robatic = 25;
export Xgroup = 26;
export Ygroup = 27;
@ -83,10 +85,10 @@ ygroup = Ygroup*;
# This grammar was experimentally extracted from what Uniscribe allows.
matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?;
syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup;
syllable_tail = xgroup matra_group xgroup (H.c)? ygroup;
broken_cluster = (Coeng.cn)* (Coeng | syllable_tail);
broken_cluster = (H.cn)* (H | syllable_tail);
consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
other = any;

View File

@ -241,7 +241,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
* the 'pref' OpenType feature applied to them.
* """
*/
if (info[i].khmer_category() == K_Cat(Coeng) && num_coengs <= 2 && i + 1 < end)
if (info[i].khmer_category() == K_Cat(H) && num_coengs <= 2 && i + 1 < end)
{
num_coengs++;

View File

@ -435,7 +435,7 @@ static const int myanmar_syllable_machine_en_main = 0;
#line 121 "hb-ot-shaper-myanmar-machine.rl"
#line 120 "hb-ot-shaper-myanmar-machine.rl"
#define found_syllable(syllable_type) \
@ -462,7 +462,7 @@ find_syllables_myanmar (hb_buffer_t *buffer)
act = 0;
}
#line 141 "hb-ot-shaper-myanmar-machine.rl"
#line 140 "hb-ot-shaper-myanmar-machine.rl"
p = 0;
@ -503,35 +503,35 @@ _eof_trans:
switch ( _myanmar_syllable_machine_trans_actions[_trans] ) {
case 6:
#line 113 "hb-ot-shaper-myanmar-machine.rl"
#line 112 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_consonant_syllable); }}
break;
case 4:
#line 114 "hb-ot-shaper-myanmar-machine.rl"
#line 113 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }}
break;
case 10:
#line 115 "hb-ot-shaper-myanmar-machine.rl"
#line 114 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_punctuation_cluster); }}
break;
case 8:
#line 116 "hb-ot-shaper-myanmar-machine.rl"
#line 115 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break;
case 3:
#line 117 "hb-ot-shaper-myanmar-machine.rl"
#line 116 "hb-ot-shaper-myanmar-machine.rl"
{te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }}
break;
case 5:
#line 113 "hb-ot-shaper-myanmar-machine.rl"
#line 112 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_consonant_syllable); }}
break;
case 7:
#line 116 "hb-ot-shaper-myanmar-machine.rl"
#line 115 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break;
case 9:
#line 117 "hb-ot-shaper-myanmar-machine.rl"
#line 116 "hb-ot-shaper-myanmar-machine.rl"
{te = p;p--;{ found_syllable (myanmar_non_myanmar_cluster); }}
break;
#line 538 "hb-ot-shaper-myanmar-machine.hh"
@ -559,7 +559,7 @@ _again:
}
#line 149 "hb-ot-shaper-myanmar-machine.rl"
#line 148 "hb-ot-shaper-myanmar-machine.rl"
}