[indic-like] Remove category duplication

This commit is contained in:
Behdad Esfahbod 2022-06-11 04:20:23 -06:00
parent 0485192195
commit 607a9fe793
12 changed files with 162 additions and 180 deletions

View File

@ -82,6 +82,62 @@ del combined
# Convert categories & positions types
categories = {
'indic' : [
'X',
'C',
'V',
'N',
'H',
'ZWNJ',
'ZWJ',
'M',
'SM',
'A',
'VD',
'PLACEHOLDER',
'DOTTEDCIRCLE',
'RS',
'Repha',
'Ra',
'CM',
'Symbol',
'CS',
],
'khmer' : [
'VAbv',
'VBlw',
'VPre',
'VPst',
'Coeng',
'Robatic',
'Xgroup',
'Ygroup',
],
'myanmar' : [
'VAbv',
'VBlw',
'VPre',
'VPst',
'IV',
'As',
'D',
'D0',
'DB',
'GB ',
'MH',
'MR',
'MW',
'MY',
'PT',
'VS',
'P',
'ML',
],
}
category_map = {
'Other' : 'X',
'Avagraha' : 'Symbol',
@ -456,6 +512,25 @@ print ('#ifndef HB_NO_OT_SHAPE')
print ()
print ('#include "hb-ot-shaper-indic.hh"')
print ()
print ('#pragma GCC diagnostic push')
print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
print ()
# Print categories
for shaper in categories:
print ('#include "hb-ot-shaper-%s-machine.hh"' % shaper)
print ()
done = {}
for shaper, shaper_cats in categories.items():
print ('/* %s */' % shaper)
for cat in shaper_cats:
v = shaper[0].upper()
if cat not in done:
print ("#define OT_%s %s_Cat(%s)" % (cat, v, cat))
done[cat] = v
else:
print ('static_assert (OT_%s == %s_Cat(%s), "");' % (cat, v, cat))
print ()
# Shorten values
short = [{
@ -493,8 +568,6 @@ for i in range (2):
what = ["OT", "POS"]
what_short = ["_OT", "_POS"]
print ('#pragma GCC diagnostic push')
print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
cat_defs = []
for i in range (2):
vv = sorted (values[i].keys ())

View File

@ -70,11 +70,12 @@ enum indic_syllable_type_t {
#define indic_syllable_machine_ex_Symbol 17u
#define indic_syllable_machine_ex_V 2u
#define indic_syllable_machine_ex_VD 9u
#define indic_syllable_machine_ex_X 0u
#define indic_syllable_machine_ex_ZWJ 6u
#define indic_syllable_machine_ex_ZWNJ 5u
#line 78 "hb-ot-shaper-indic-machine.hh"
#line 79 "hb-ot-shaper-indic-machine.hh"
static const unsigned char _indic_syllable_machine_trans_keys[] = {
8u, 8u, 4u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 15u, 15u, 4u, 8u,
4u, 12u, 4u, 8u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 15u, 15u,
@ -417,7 +418,7 @@ static const int indic_syllable_machine_en_main = 39;
#line 119 "hb-ot-shaper-indic-machine.rl"
#line 120 "hb-ot-shaper-indic-machine.rl"
#define found_syllable(syllable_type) \
@ -429,14 +430,14 @@ static const int indic_syllable_machine_en_main = 39;
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static void
inline void
find_syllables_indic (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts, te, act;
int cs;
hb_glyph_info_t *info = buffer->info;
#line 440 "hb-ot-shaper-indic-machine.hh"
#line 441 "hb-ot-shaper-indic-machine.hh"
{
cs = indic_syllable_machine_start;
ts = 0;
@ -444,7 +445,7 @@ find_syllables_indic (hb_buffer_t *buffer)
act = 0;
}
#line 139 "hb-ot-shaper-indic-machine.rl"
#line 140 "hb-ot-shaper-indic-machine.rl"
p = 0;
@ -452,7 +453,7 @@ find_syllables_indic (hb_buffer_t *buffer)
unsigned int syllable_serial = 1;
#line 456 "hb-ot-shaper-indic-machine.hh"
#line 457 "hb-ot-shaper-indic-machine.hh"
{
int _slen;
int _trans;
@ -466,7 +467,7 @@ _resume:
#line 1 "NONE"
{ts = p;}
break;
#line 470 "hb-ot-shaper-indic-machine.hh"
#line 471 "hb-ot-shaper-indic-machine.hh"
}
_keys = _indic_syllable_machine_trans_keys + (cs<<1);
@ -489,51 +490,51 @@ _eof_trans:
{te = p+1;}
break;
case 11:
#line 115 "hb-ot-shaper-indic-machine.rl"
#line 116 "hb-ot-shaper-indic-machine.rl"
{te = p+1;{ found_syllable (indic_non_indic_cluster); }}
break;
case 13:
#line 110 "hb-ot-shaper-indic-machine.rl"
#line 111 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_consonant_syllable); }}
break;
case 14:
#line 111 "hb-ot-shaper-indic-machine.rl"
#line 112 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_vowel_syllable); }}
break;
case 17:
#line 112 "hb-ot-shaper-indic-machine.rl"
#line 113 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_standalone_cluster); }}
break;
case 19:
#line 113 "hb-ot-shaper-indic-machine.rl"
#line 114 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_symbol_cluster); }}
break;
case 15:
#line 114 "hb-ot-shaper-indic-machine.rl"
#line 115 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break;
case 16:
#line 115 "hb-ot-shaper-indic-machine.rl"
#line 116 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_non_indic_cluster); }}
break;
case 1:
#line 110 "hb-ot-shaper-indic-machine.rl"
#line 111 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_consonant_syllable); }}
break;
case 3:
#line 111 "hb-ot-shaper-indic-machine.rl"
#line 112 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_vowel_syllable); }}
break;
case 7:
#line 112 "hb-ot-shaper-indic-machine.rl"
#line 113 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_standalone_cluster); }}
break;
case 8:
#line 113 "hb-ot-shaper-indic-machine.rl"
#line 114 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_symbol_cluster); }}
break;
case 4:
#line 114 "hb-ot-shaper-indic-machine.rl"
#line 115 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break;
case 6:
@ -554,22 +555,22 @@ _eof_trans:
case 18:
#line 1 "NONE"
{te = p+1;}
#line 110 "hb-ot-shaper-indic-machine.rl"
#line 111 "hb-ot-shaper-indic-machine.rl"
{act = 1;}
break;
case 5:
#line 1 "NONE"
{te = p+1;}
#line 114 "hb-ot-shaper-indic-machine.rl"
#line 115 "hb-ot-shaper-indic-machine.rl"
{act = 5;}
break;
case 12:
#line 1 "NONE"
{te = p+1;}
#line 115 "hb-ot-shaper-indic-machine.rl"
#line 116 "hb-ot-shaper-indic-machine.rl"
{act = 6;}
break;
#line 573 "hb-ot-shaper-indic-machine.hh"
#line 574 "hb-ot-shaper-indic-machine.hh"
}
_again:
@ -578,7 +579,7 @@ _again:
#line 1 "NONE"
{ts = 0;}
break;
#line 582 "hb-ot-shaper-indic-machine.hh"
#line 583 "hb-ot-shaper-indic-machine.hh"
}
if ( ++p != pe )
@ -594,7 +595,7 @@ _again:
}
#line 147 "hb-ot-shaper-indic-machine.rl"
#line 148 "hb-ot-shaper-indic-machine.rl"
}

View File

@ -60,8 +60,7 @@ enum indic_syllable_type_t {
%%{
# These values are replicated from indic.hh, and relisted in indic.cc; keep in sync.
export X = 0;
export C = 1;
export V = 2;
export N = 3;
@ -127,7 +126,7 @@ main := |*
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static void
inline void
find_syllables_indic (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts, te, act;

View File

@ -23,6 +23,60 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-macros"
#include "hb-ot-shaper-indic-machine.hh"
#include "hb-ot-shaper-khmer-machine.hh"
#include "hb-ot-shaper-myanmar-machine.hh"
/* indic */
#define OT_X I_Cat(X)
#define OT_C I_Cat(C)
#define OT_V I_Cat(V)
#define OT_N I_Cat(N)
#define OT_H I_Cat(H)
#define OT_ZWNJ I_Cat(ZWNJ)
#define OT_ZWJ I_Cat(ZWJ)
#define OT_M I_Cat(M)
#define OT_SM I_Cat(SM)
#define OT_A I_Cat(A)
#define OT_VD I_Cat(VD)
#define OT_PLACEHOLDER I_Cat(PLACEHOLDER)
#define OT_DOTTEDCIRCLE I_Cat(DOTTEDCIRCLE)
#define OT_RS I_Cat(RS)
#define OT_Repha I_Cat(Repha)
#define OT_Ra I_Cat(Ra)
#define OT_CM I_Cat(CM)
#define OT_Symbol I_Cat(Symbol)
#define OT_CS I_Cat(CS)
/* khmer */
#define OT_VAbv K_Cat(VAbv)
#define OT_VBlw K_Cat(VBlw)
#define OT_VPre K_Cat(VPre)
#define OT_VPst K_Cat(VPst)
#define OT_Coeng K_Cat(Coeng)
#define OT_Robatic K_Cat(Robatic)
#define OT_Xgroup K_Cat(Xgroup)
#define OT_Ygroup K_Cat(Ygroup)
/* myanmar */
static_assert (OT_VAbv == M_Cat(VAbv), "");
static_assert (OT_VBlw == M_Cat(VBlw), "");
static_assert (OT_VPre == M_Cat(VPre), "");
static_assert (OT_VPst == M_Cat(VPst), "");
#define OT_IV M_Cat(IV)
#define OT_As M_Cat(As)
#define OT_D M_Cat(D)
#define OT_D0 M_Cat(D0)
#define OT_DB M_Cat(DB)
#define OT_GB M_Cat(GB )
#define OT_MH M_Cat(MH)
#define OT_MR M_Cat(MR)
#define OT_MW M_Cat(MW)
#define OT_MY M_Cat(MY)
#define OT_PT M_Cat(PT)
#define OT_VS M_Cat(VS)
#define OT_P M_Cat(P)
#define OT_ML M_Cat(ML)
#define _OT_A OT_A /* 53 chars; A */
#define _OT_As OT_As /* 1 chars; As */
#define _OT_C OT_C /* 518 chars; C */

View File

@ -39,29 +39,6 @@
*/
#define I_Check(C) static_assert (OT_##C == I_Cat(C), "")
I_Check (C);
I_Check (V);
I_Check (N);
I_Check (H);
I_Check (ZWNJ);
I_Check (ZWJ);
I_Check (M);
I_Check (SM);
I_Check (A);
I_Check (VD);
I_Check (PLACEHOLDER);
I_Check (DOTTEDCIRCLE);
I_Check (RS);
I_Check (Repha);
I_Check (Ra);
I_Check (CM);
I_Check (Symbol);
I_Check (CS);
#undef I_Check
static inline void
set_indic_properties (hb_glyph_info_t &info)
{

View File

@ -32,67 +32,6 @@
#include "hb-ot-shaper-syllabic.hh"
/* Cateories used in the OpenType spec:
* https://docs.microsoft.com/en-us/typography/script-development/devanagari
*/
/* Note: This enum is duplicated the machine machine.rl files.
* We can avoid that by defining this enum in terms of those in the
* indic-table.cc file, but I like this enum duplicated here, because
* this gives us a unified view of all the numbers.
*
* The equality of these and the duplicated numbers is checked by way
* of static_assert's in the respective .cc shaper files. Keep those
* in sync as well. */
enum ot_category_t {
OT_X = 0,
OT_C = 1,
OT_V = 2,
OT_N = 3,
OT_H = 4,
OT_ZWNJ = 5,
OT_ZWJ = 6,
OT_M = 7,
OT_SM = 8,
OT_A = 9,
OT_VD = OT_A,
OT_PLACEHOLDER = 10,
OT_DOTTEDCIRCLE = 11,
OT_RS = 12, /* Register Shifter, used in Khmer OT spec. */
OT_Repha = 14, /* Atomically-encoded logical or visual repha. */
OT_Ra = 15,
OT_CM = 16, /* Consonant-Medial. */
OT_Symbol = 17, /* Avagraha, etc that take marks (SM,A,VD). */
OT_CS = 18,
/* Khmer & Myanmar shapers. */
OT_VAbv = 20,
OT_VBlw = 21,
OT_VPre = 22,
OT_VPst = 23,
/* Khmer. */
OT_Coeng = OT_H,
OT_Robatic = 25,
OT_Xgroup = 26,
OT_Ygroup = 27,
/* Myanmar */
OT_IV = OT_V,
OT_As = 32, // Asat
OT_D = 33, // Digits except zero
OT_D0 = 34, // Digit zero
OT_DB = OT_N, // Dot below
OT_GB = OT_PLACEHOLDER,
OT_MH = 35, // Medial Ha
OT_MR = 36, // Medial Ra
OT_MW = 37, // Medial Wa, Shan Wa
OT_MY = 38, // Medial Ya, Mon Na, Mon Ma
OT_PT = 39, // Pwo and other tones
OT_VS = 40, // Variation selectors
OT_P = 41, // Punctuation
OT_ML = 42, // Medial Mon La
};
/* Visual positions in a syllable from left to right. */
enum ot_position_t {
POS_START = 0,

View File

@ -271,7 +271,7 @@ static const int khmer_syllable_machine_en_main = 20;
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static void
inline void
find_syllables_khmer (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts, te, act HB_UNUSED;

View File

@ -55,8 +55,6 @@ enum khmer_syllable_type_t {
%%{
# These values are replicated from indic.hh, and relisted in khmer.cc; keep in sync.
export C = 1;
export V = 2;
export ZWNJ = 5;
@ -110,7 +108,7 @@ main := |*
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static void
inline void
find_syllables_khmer (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts, te, act HB_UNUSED;

View File

@ -38,29 +38,6 @@
*/
#define K_Check(C) static_assert (OT_##C == K_Cat(C), "")
K_Check (C);
K_Check (V);
K_Check (ZWNJ);
K_Check (ZWJ);
K_Check (PLACEHOLDER);
K_Check (DOTTEDCIRCLE);
K_Check (Ra);
K_Check (VAbv);
K_Check (VBlw);
K_Check (VPre);
K_Check (VPst);
K_Check (Coeng);
K_Check (Robatic);
K_Check (Xgroup);
K_Check (Ygroup);
#undef K_Check
static const hb_ot_map_feature_t
khmer_features[] =
{

View File

@ -447,7 +447,7 @@ static const int myanmar_syllable_machine_en_main = 0;
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static void
inline void
find_syllables_myanmar (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts, te, act HB_UNUSED;

View File

@ -58,8 +58,6 @@ enum myanmar_syllable_type_t {
%%{
# These values are replicated from indic.hh, and relisted in myanmar.cc; keep in sync.
export C = 1;
export IV = 2;
export DB = 3; # Dot below = OT_N
@ -78,6 +76,7 @@ export VBlw = 21;
export VPre = 22;
export VPst = 23;
# 32+ are for Myanmar-specific values
export As = 32; # Asat
export D = 33; # Digits except zero
export D0 = 34; # Digit zero
@ -129,7 +128,7 @@ main := |*
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static void
inline void
find_syllables_myanmar (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts, te, act HB_UNUSED;

View File

@ -38,41 +38,6 @@
*/
#define M_Check(C) static_assert (OT_##C == M_Cat(C), "")
M_Check (C);
M_Check (IV);
M_Check (DB);
M_Check (H);
M_Check (ZWNJ);
M_Check (ZWJ);
M_Check (SM);
M_Check (GB);
M_Check (DOTTEDCIRCLE);
M_Check (A);
M_Check (Ra);
M_Check (CS);
M_Check (VAbv);
M_Check (VBlw);
M_Check (VPre);
M_Check (VPst);
M_Check (As);
M_Check (D);
M_Check (D0);
M_Check (MH);
M_Check (MR);
M_Check (MW);
M_Check (MY);
M_Check (PT);
M_Check (VS);
M_Check (P);
M_Check (ML);
#undef M_Check
static const hb_tag_t
myanmar_basic_features[] =
{