[khmer] Rewrite grammar completely

Based on experimenting with Uniscribe to extract grammar and categories.

Failures down from 44 to 35:

KHMER: 299089 out of 299124 tests passed. 35 failed (0.0117008%)

We still don't enforce the one-matra rule pre-decomposition, but enforce
an order and one-matra-per-position post-decomposition.

https://github.com/harfbuzz/harfbuzz/issues/667
This commit is contained in:
Behdad Esfahbod 2018-10-01 19:09:58 +02:00
parent aaaa65baa7
commit 5143654716
5 changed files with 266 additions and 173 deletions

View File

@ -125,7 +125,7 @@ enum indic_syllabic_category_t {
INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA = OT_Repha,
INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED = OT_X, /* Don't care. */
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_CM,
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA = OT_N,
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA = OT_CM,
INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER = OT_CS,
INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK = OT_SM, /* https://github.com/harfbuzz/harfbuzz/issues/552 */
INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER = OT_Coeng,

View File

@ -34,130 +34,200 @@
#line 36 "hb-ot-shape-complex-khmer-machine.hh"
static const unsigned char _khmer_syllable_machine_trans_keys[] = {
7u, 7u, 1u, 16u, 13u, 13u, 1u, 16u, 7u, 13u, 7u, 7u, 1u, 16u, 13u, 13u,
1u, 16u, 7u, 13u, 1u, 16u, 3u, 14u, 3u, 14u, 5u, 14u, 3u, 14u, 5u, 14u,
8u, 8u, 3u, 13u, 3u, 8u, 8u, 8u, 3u, 8u, 3u, 14u, 3u, 14u, 5u, 14u,
3u, 14u, 5u, 14u, 8u, 8u, 3u, 13u, 3u, 8u, 8u, 8u, 3u, 8u, 3u, 14u,
3u, 14u, 7u, 13u, 7u, 7u, 1u, 16u, 0
5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, 5u, 21u,
5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u,
5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 5u, 26u, 1u, 16u, 1u, 29u, 5u, 29u,
5u, 29u, 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 5u, 29u, 5u, 26u,
5u, 29u, 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 1u, 16u, 5u, 29u,
5u, 29u, 0
};
static const char _khmer_syllable_machine_key_spans[] = {
1, 16, 1, 16, 7, 1, 16, 1,
16, 7, 16, 12, 12, 10, 12, 10,
1, 11, 6, 1, 6, 12, 12, 10,
12, 10, 1, 11, 6, 1, 6, 12,
12, 7, 1, 16
22, 17, 22, 17, 16, 17, 22, 17,
22, 17, 16, 17, 22, 17, 16, 17,
22, 17, 22, 17, 22, 16, 29, 25,
25, 25, 1, 18, 25, 25, 25, 22,
25, 25, 1, 18, 25, 25, 16, 25,
25
};
static const short _khmer_syllable_machine_index_offsets[] = {
0, 2, 19, 21, 38, 46, 48, 65,
67, 84, 92, 109, 122, 135, 146, 159,
170, 172, 184, 191, 193, 200, 213, 226,
237, 250, 261, 263, 275, 282, 284, 291,
304, 317, 325, 327
0, 23, 41, 64, 82, 99, 117, 140,
158, 181, 199, 216, 234, 257, 275, 292,
310, 333, 351, 374, 392, 415, 432, 462,
488, 514, 540, 542, 561, 587, 613, 639,
662, 688, 714, 716, 735, 761, 787, 804,
830
};
static const char _khmer_syllable_machine_indicies[] = {
1, 0, 2, 2, 0, 0, 0, 0,
1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 2,
3, 0, 0, 0, 0, 4, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 3,
0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 2, 0, 3, 0, 4, 4, 0,
0, 3, 0, 0, 0, 0, 4, 0,
5, 5, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 4, 0, 1, 0,
0, 0, 0, 0, 5, 0, 7, 6,
8, 8, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 8,
6, 9, 6, 10, 10, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 10, 6, 7, 6, 6, 6,
6, 6, 11, 6, 4, 4, 13, 12,
14, 15, 7, 16, 12, 12, 4, 4,
11, 17, 12, 4, 12, 19, 18, 20,
21, 1, 22, 18, 18, 18, 18, 5,
23, 18, 24, 18, 21, 21, 1, 22,
18, 18, 18, 18, 18, 23, 18, 21,
21, 1, 22, 18, 18, 18, 18, 18,
23, 18, 25, 18, 21, 21, 1, 22,
18, 18, 18, 18, 18, 26, 18, 21,
21, 1, 22, 18, 18, 18, 18, 18,
26, 18, 27, 18, 28, 18, 29, 18,
18, 22, 18, 18, 18, 18, 3, 18,
30, 18, 18, 18, 18, 22, 18, 22,
18, 28, 18, 18, 18, 18, 22, 18,
19, 18, 21, 21, 1, 22, 18, 18,
18, 18, 18, 23, 18, 32, 31, 33,
33, 7, 16, 31, 31, 31, 31, 31,
34, 31, 33, 33, 7, 16, 31, 31,
31, 31, 31, 34, 31, 35, 31, 33,
33, 7, 16, 31, 31, 31, 31, 31,
36, 31, 33, 33, 7, 16, 31, 31,
31, 31, 31, 36, 31, 37, 31, 38,
31, 39, 31, 31, 16, 31, 31, 31,
31, 9, 31, 40, 31, 31, 31, 31,
16, 31, 16, 31, 38, 31, 31, 31,
31, 16, 31, 13, 31, 41, 33, 7,
16, 31, 31, 31, 31, 11, 34, 31,
13, 31, 33, 33, 7, 16, 31, 31,
31, 31, 31, 34, 31, 7, 42, 42,
42, 42, 42, 11, 42, 7, 42, 10,
10, 42, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 42, 42, 10, 42,
4, 0, 6, 6, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 6, 0, 7, 7, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 8, 0, 9, 9, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 10, 0, 0,
0, 0, 4, 0, 9, 9, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 10, 0, 11, 11,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 12, 0,
0, 0, 0, 4, 0, 11, 11, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 12, 0, 13,
13, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 13, 0,
15, 15, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14,
16, 14, 15, 15, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 16, 17, 17, 17, 17, 18,
17, 19, 19, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17,
17, 18, 17, 20, 20, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 20, 17, 21, 21, 17, 17,
17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 22, 17, 23, 23,
17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 24, 17,
17, 17, 17, 18, 17, 23, 23, 17,
17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 24, 17, 25,
25, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 26,
17, 17, 17, 17, 18, 17, 25, 25,
17, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 26, 17,
15, 15, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 27,
16, 17, 17, 17, 17, 18, 17, 28,
28, 17, 17, 17, 17, 17, 17, 17,
17, 17, 17, 17, 17, 17, 28, 17,
13, 13, 29, 29, 30, 30, 29, 29,
29, 29, 2, 2, 29, 31, 29, 13,
29, 29, 29, 29, 16, 20, 29, 29,
29, 18, 24, 26, 22, 29, 33, 33,
32, 32, 32, 32, 32, 32, 32, 34,
32, 32, 32, 32, 32, 2, 3, 6,
32, 32, 32, 4, 10, 12, 8, 32,
35, 35, 32, 32, 32, 32, 32, 32,
32, 36, 32, 32, 32, 32, 32, 32,
3, 6, 32, 32, 32, 4, 10, 12,
8, 32, 5, 5, 32, 32, 32, 32,
32, 32, 32, 36, 32, 32, 32, 32,
32, 32, 4, 6, 32, 32, 32, 32,
32, 32, 8, 32, 6, 32, 7, 7,
32, 32, 32, 32, 32, 32, 32, 36,
32, 32, 32, 32, 32, 32, 8, 6,
32, 37, 37, 32, 32, 32, 32, 32,
32, 32, 36, 32, 32, 32, 32, 32,
32, 10, 6, 32, 32, 32, 4, 32,
32, 8, 32, 38, 38, 32, 32, 32,
32, 32, 32, 32, 36, 32, 32, 32,
32, 32, 32, 12, 6, 32, 32, 32,
4, 10, 32, 8, 32, 35, 35, 32,
32, 32, 32, 32, 32, 32, 34, 32,
32, 32, 32, 32, 32, 3, 6, 32,
32, 32, 4, 10, 12, 8, 32, 15,
15, 39, 39, 39, 39, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 16,
39, 39, 39, 39, 18, 39, 41, 41,
40, 40, 40, 40, 40, 40, 40, 42,
40, 40, 40, 40, 40, 40, 16, 20,
40, 40, 40, 18, 24, 26, 22, 40,
19, 19, 40, 40, 40, 40, 40, 40,
40, 42, 40, 40, 40, 40, 40, 40,
18, 20, 40, 40, 40, 40, 40, 40,
22, 40, 20, 40, 21, 21, 40, 40,
40, 40, 40, 40, 40, 42, 40, 40,
40, 40, 40, 40, 22, 20, 40, 43,
43, 40, 40, 40, 40, 40, 40, 40,
42, 40, 40, 40, 40, 40, 40, 24,
20, 40, 40, 40, 18, 40, 40, 22,
40, 44, 44, 40, 40, 40, 40, 40,
40, 40, 42, 40, 40, 40, 40, 40,
40, 26, 20, 40, 40, 40, 18, 24,
40, 22, 40, 28, 28, 39, 39, 39,
39, 39, 39, 39, 39, 39, 39, 39,
39, 39, 28, 39, 45, 45, 40, 40,
40, 40, 40, 40, 40, 46, 40, 40,
40, 40, 40, 27, 16, 20, 40, 40,
40, 18, 24, 26, 22, 40, 41, 41,
40, 40, 40, 40, 40, 40, 40, 46,
40, 40, 40, 40, 40, 40, 16, 20,
40, 40, 40, 18, 24, 26, 22, 40,
0
};
static const char _khmer_syllable_machine_trans_targs[] = {
10, 14, 17, 20, 11, 21, 10, 24,
27, 30, 31, 32, 10, 22, 33, 34,
26, 35, 10, 12, 4, 0, 16, 3,
13, 15, 1, 10, 18, 2, 19, 10,
23, 5, 8, 25, 6, 10, 28, 7,
29, 9, 10
22, 1, 30, 24, 25, 3, 26, 5,
27, 7, 28, 9, 29, 23, 22, 11,
32, 22, 33, 13, 34, 15, 35, 17,
36, 19, 37, 40, 39, 22, 31, 38,
22, 0, 10, 2, 4, 6, 8, 22,
22, 12, 14, 16, 18, 20, 21
};
static const char _khmer_syllable_machine_trans_actions[] = {
1, 2, 2, 0, 2, 2, 3, 2,
2, 0, 2, 2, 6, 2, 0, 0,
0, 0, 7, 2, 0, 0, 0, 0,
2, 2, 0, 8, 0, 0, 0, 9,
2, 0, 0, 2, 0, 10, 0, 0,
0, 0, 11
1, 0, 2, 2, 2, 0, 0, 0,
2, 0, 2, 0, 2, 2, 3, 0,
4, 5, 2, 0, 0, 0, 2, 0,
2, 0, 2, 4, 4, 8, 9, 0,
10, 0, 0, 0, 0, 0, 0, 11,
12, 0, 0, 0, 0, 0, 0
};
static const char _khmer_syllable_machine_to_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 4, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 6, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0
0
};
static const char _khmer_syllable_machine_from_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 5, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 7, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0
0
};
static const unsigned char _khmer_syllable_machine_eof_trans[] = {
1, 1, 1, 1, 1, 7, 7, 7,
7, 7, 0, 19, 19, 19, 19, 19,
19, 19, 19, 19, 19, 19, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32,
32, 43, 43, 43
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 15, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 0, 33,
33, 33, 33, 33, 33, 33, 33, 40,
41, 41, 41, 41, 41, 41, 40, 41,
41
};
static const int khmer_syllable_machine_start = 10;
static const int khmer_syllable_machine_first_final = 10;
static const int khmer_syllable_machine_start = 22;
static const int khmer_syllable_machine_first_final = 22;
static const int khmer_syllable_machine_error = -1;
static const int khmer_syllable_machine_en_main = 10;
static const int khmer_syllable_machine_en_main = 22;
#line 36 "hb-ot-shape-complex-khmer-machine.rl"
#line 74 "hb-ot-shape-complex-khmer-machine.rl"
#line 80 "hb-ot-shape-complex-khmer-machine.rl"
#define found_syllable(syllable_type) \
@ -177,7 +247,7 @@ find_syllables (hb_buffer_t *buffer)
int cs;
hb_glyph_info_t *info = buffer->info;
#line 181 "hb-ot-shape-complex-khmer-machine.hh"
#line 251 "hb-ot-shape-complex-khmer-machine.hh"
{
cs = khmer_syllable_machine_start;
ts = 0;
@ -185,7 +255,7 @@ find_syllables (hb_buffer_t *buffer)
act = 0;
}
#line 95 "hb-ot-shape-complex-khmer-machine.rl"
#line 101 "hb-ot-shape-complex-khmer-machine.rl"
p = 0;
@ -194,7 +264,7 @@ find_syllables (hb_buffer_t *buffer)
unsigned int last = 0;
unsigned int syllable_serial = 1;
#line 198 "hb-ot-shape-complex-khmer-machine.hh"
#line 268 "hb-ot-shape-complex-khmer-machine.hh"
{
int _slen;
int _trans;
@ -204,11 +274,11 @@ find_syllables (hb_buffer_t *buffer)
goto _test_eof;
_resume:
switch ( _khmer_syllable_machine_from_state_actions[cs] ) {
case 5:
case 7:
#line 1 "NONE"
{ts = p;}
break;
#line 212 "hb-ot-shape-complex-khmer-machine.hh"
#line 282 "hb-ot-shape-complex-khmer-machine.hh"
}
_keys = _khmer_syllable_machine_trans_keys + (cs<<1);
@ -231,47 +301,63 @@ _eof_trans:
{te = p+1;}
break;
case 8:
#line 68 "hb-ot-shape-complex-khmer-machine.rl"
{te = p+1;{ found_syllable (consonant_syllable); }}
break;
case 10:
#line 69 "hb-ot-shape-complex-khmer-machine.rl"
{te = p+1;{ found_syllable (broken_cluster); }}
break;
case 6:
#line 70 "hb-ot-shape-complex-khmer-machine.rl"
#line 76 "hb-ot-shape-complex-khmer-machine.rl"
{te = p+1;{ found_syllable (non_khmer_cluster); }}
break;
case 7:
#line 68 "hb-ot-shape-complex-khmer-machine.rl"
case 10:
#line 74 "hb-ot-shape-complex-khmer-machine.rl"
{te = p;p--;{ found_syllable (consonant_syllable); }}
break;
case 9:
#line 69 "hb-ot-shape-complex-khmer-machine.rl"
case 12:
#line 75 "hb-ot-shape-complex-khmer-machine.rl"
{te = p;p--;{ found_syllable (broken_cluster); }}
break;
case 11:
#line 70 "hb-ot-shape-complex-khmer-machine.rl"
#line 76 "hb-ot-shape-complex-khmer-machine.rl"
{te = p;p--;{ found_syllable (non_khmer_cluster); }}
break;
case 1:
#line 68 "hb-ot-shape-complex-khmer-machine.rl"
#line 74 "hb-ot-shape-complex-khmer-machine.rl"
{{p = ((te))-1;}{ found_syllable (consonant_syllable); }}
break;
case 3:
#line 69 "hb-ot-shape-complex-khmer-machine.rl"
case 5:
#line 75 "hb-ot-shape-complex-khmer-machine.rl"
{{p = ((te))-1;}{ found_syllable (broken_cluster); }}
break;
#line 266 "hb-ot-shape-complex-khmer-machine.hh"
case 3:
#line 1 "NONE"
{ switch( act ) {
case 2:
{{p = ((te))-1;} found_syllable (broken_cluster); }
break;
case 3:
{{p = ((te))-1;} found_syllable (non_khmer_cluster); }
break;
}
}
break;
case 4:
#line 1 "NONE"
{te = p+1;}
#line 75 "hb-ot-shape-complex-khmer-machine.rl"
{act = 2;}
break;
case 9:
#line 1 "NONE"
{te = p+1;}
#line 76 "hb-ot-shape-complex-khmer-machine.rl"
{act = 3;}
break;
#line 352 "hb-ot-shape-complex-khmer-machine.hh"
}
_again:
switch ( _khmer_syllable_machine_to_state_actions[cs] ) {
case 4:
case 6:
#line 1 "NONE"
{ts = 0;}
break;
#line 275 "hb-ot-shape-complex-khmer-machine.hh"
#line 361 "hb-ot-shape-complex-khmer-machine.hh"
}
if ( ++p != pe )
@ -287,7 +373,7 @@ _again:
}
#line 104 "hb-ot-shape-complex-khmer-machine.rl"
#line 110 "hb-ot-shape-complex-khmer-machine.rl"
}

View File

@ -40,28 +40,34 @@
# Same order as enum khmer_category_t. Not sure how to avoid duplication.
C = 1;
V = 2;
N = 3;
ZWNJ = 5;
ZWJ = 6;
M = 7;
SM = 8;
PLACEHOLDER = 11;
DOTTEDCIRCLE = 12;
RS = 13;
Coeng = 14;
Ra = 16;
Coeng= 14;
Ra = 16;
Robatic = 20;
Xgroup = 21;
Ygroup = 22;
VAbv = 26;
VBlw = 27;
VPre = 28;
VPst = 29;
c = (C | Ra | V); # is_consonant
n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier
z = ZWJ|ZWNJ; # is_joiner
c = (C | Ra | V);
cn = c.((ZWJ|ZWNJ)?.Robatic)?;
joiner = (ZWJ | ZWNJ);
xgroup = (joiner*.Xgroup)*;
ygroup = Ygroup*;
cn = c.n?;
matra_group = z?.M.N?;
syllable_tail = (SM.SM?)?;
# This grammar was experimentally extracted from what Uniscribe allows.
matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?;
syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup;
broken_cluster = n? (Coeng.cn)* matra_group* (Coeng.cn)? syllable_tail;
consonant_syllable = (c|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
broken_cluster = (Coeng.cn)* syllable_tail;
consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
other = any;
main := |*

View File

@ -241,7 +241,6 @@ setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
hb_font_t *font HB_UNUSED)
{
HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
HB_BUFFER_ALLOCATE_VAR (buffer, khmer_position);
/* We cannot setup masks here. We save information about characters
* and setup masks later on in a pause-callback. */
@ -330,7 +329,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
}
/* Reorder left matra piece. */
else if (info[i].khmer_position() == POS_PRE_M)
else if (info[i].khmer_category() == OT_VPre)
{
/* Move to the start. */
buffer->merge_clusters (start, i + 1);
@ -432,7 +431,6 @@ reorder (const hb_ot_shape_plan_t *plan,
initial_reordering_syllable (plan, font->face, buffer, start, end);
HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_position);
}
static void

View File

@ -34,30 +34,22 @@
/* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */
#define khmer_position() indic_position() /* khmer_position_t */
#define khmer_position() indic_position() /* indic_position_t */
typedef indic_category_t khmer_category_t;
typedef indic_position_t khmer_position_t;
static inline khmer_position_t
matra_position_khmer (khmer_position_t side)
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum khmer_category_t
{
switch ((int) side)
{
case POS_PRE_C:
return POS_PRE_M;
OT_Robatic = 20,
OT_Xgroup = 21,
OT_Ygroup = 22,
case POS_POST_C:
case POS_ABOVE_C:
case POS_BELOW_C:
return POS_AFTER_POST;
default:
return side;
};
}
OT_VAbv = 26,
OT_VBlw = 27,
OT_VPre = 28,
OT_VPst = 29,
};
static inline void
set_khmer_properties (hb_glyph_info_t &info)
@ -65,47 +57,58 @@ set_khmer_properties (hb_glyph_info_t &info)
hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u);
khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
khmer_position_t pos = (khmer_position_t) (type >> 8);
indic_position_t pos = (indic_position_t) (type >> 8);
/*
* Re-assign category
*
* These categories are experimentally extracted from what Uniscribe allows.
*/
if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) ||
u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */
switch (u)
{
/* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier.
* https://github.com/roozbehp/unicode-data/issues/5 */
cat = OT_M;
pos = POS_ABOVE_C;
}
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER;
else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
case 0x179Au:
cat = (khmer_category_t) OT_Ra;
break;
case 0x17CCu:
case 0x17C9u:
case 0x17CAu:
cat = OT_Robatic;
break;
case 0x17C6u:
case 0x17CBu:
case 0x17CDu:
case 0x17CEu:
case 0x17CFu:
case 0x17D0u:
case 0x17D1u:
cat = OT_Xgroup;
break;
case 0x17C7u:
case 0x17C8u:
case 0x17DDu:
case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
cat = OT_Ygroup;
break;
}
/*
* Re-assign position.
*/
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
{
pos = POS_BASE_C;
if (u == 0x179Au)
cat = OT_Ra;
}
else if (cat == OT_M)
{
pos = matra_position_khmer (pos);
}
else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol))))
{
pos = POS_SMVD;
}
if (cat == (khmer_category_t) OT_M)
switch ((int) pos)
{
case POS_PRE_C: cat = OT_VPre; break;
case POS_BELOW_C: cat = OT_VBlw; break;
case POS_ABOVE_C: cat = OT_VAbv; break;
case POS_POST_C: cat = OT_VPst; break;
default: assert (0);
};
info.khmer_category() = cat;
info.khmer_position() = pos;
}