[khmer] Rewrite grammar completely
Based on experimenting with Uniscribe to extract grammar and categories. Failures down from 44 to 35: KHMER: 299089 out of 299124 tests passed. 35 failed (0.0117008%) We still don't enforce the one-matra rule pre-decomposition, but enforce an order and one-matra-per-position post-decomposition. https://github.com/harfbuzz/harfbuzz/issues/667
This commit is contained in:
parent
aaaa65baa7
commit
5143654716
|
@ -125,7 +125,7 @@ enum indic_syllabic_category_t {
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA = OT_Repha,
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA = OT_Repha,
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED = OT_X, /* Don't care. */
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED = OT_X, /* Don't care. */
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_CM,
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_CM,
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA = OT_N,
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA = OT_CM,
|
||||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER = OT_CS,
|
INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER = OT_CS,
|
||||||
INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK = OT_SM, /* https://github.com/harfbuzz/harfbuzz/issues/552 */
|
INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK = OT_SM, /* https://github.com/harfbuzz/harfbuzz/issues/552 */
|
||||||
INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER = OT_Coeng,
|
INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER = OT_Coeng,
|
||||||
|
|
|
@ -34,130 +34,200 @@
|
||||||
|
|
||||||
#line 36 "hb-ot-shape-complex-khmer-machine.hh"
|
#line 36 "hb-ot-shape-complex-khmer-machine.hh"
|
||||||
static const unsigned char _khmer_syllable_machine_trans_keys[] = {
|
static const unsigned char _khmer_syllable_machine_trans_keys[] = {
|
||||||
7u, 7u, 1u, 16u, 13u, 13u, 1u, 16u, 7u, 13u, 7u, 7u, 1u, 16u, 13u, 13u,
|
5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, 5u, 21u,
|
||||||
1u, 16u, 7u, 13u, 1u, 16u, 3u, 14u, 3u, 14u, 5u, 14u, 3u, 14u, 5u, 14u,
|
5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u,
|
||||||
8u, 8u, 3u, 13u, 3u, 8u, 8u, 8u, 3u, 8u, 3u, 14u, 3u, 14u, 5u, 14u,
|
5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 5u, 26u, 1u, 16u, 1u, 29u, 5u, 29u,
|
||||||
3u, 14u, 5u, 14u, 8u, 8u, 3u, 13u, 3u, 8u, 8u, 8u, 3u, 8u, 3u, 14u,
|
5u, 29u, 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 5u, 29u, 5u, 26u,
|
||||||
3u, 14u, 7u, 13u, 7u, 7u, 1u, 16u, 0
|
5u, 29u, 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 1u, 16u, 5u, 29u,
|
||||||
|
5u, 29u, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char _khmer_syllable_machine_key_spans[] = {
|
static const char _khmer_syllable_machine_key_spans[] = {
|
||||||
1, 16, 1, 16, 7, 1, 16, 1,
|
22, 17, 22, 17, 16, 17, 22, 17,
|
||||||
16, 7, 16, 12, 12, 10, 12, 10,
|
22, 17, 16, 17, 22, 17, 16, 17,
|
||||||
1, 11, 6, 1, 6, 12, 12, 10,
|
22, 17, 22, 17, 22, 16, 29, 25,
|
||||||
12, 10, 1, 11, 6, 1, 6, 12,
|
25, 25, 1, 18, 25, 25, 25, 22,
|
||||||
12, 7, 1, 16
|
25, 25, 1, 18, 25, 25, 16, 25,
|
||||||
|
25
|
||||||
};
|
};
|
||||||
|
|
||||||
static const short _khmer_syllable_machine_index_offsets[] = {
|
static const short _khmer_syllable_machine_index_offsets[] = {
|
||||||
0, 2, 19, 21, 38, 46, 48, 65,
|
0, 23, 41, 64, 82, 99, 117, 140,
|
||||||
67, 84, 92, 109, 122, 135, 146, 159,
|
158, 181, 199, 216, 234, 257, 275, 292,
|
||||||
170, 172, 184, 191, 193, 200, 213, 226,
|
310, 333, 351, 374, 392, 415, 432, 462,
|
||||||
237, 250, 261, 263, 275, 282, 284, 291,
|
488, 514, 540, 542, 561, 587, 613, 639,
|
||||||
304, 317, 325, 327
|
662, 688, 714, 716, 735, 761, 787, 804,
|
||||||
|
830
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char _khmer_syllable_machine_indicies[] = {
|
static const char _khmer_syllable_machine_indicies[] = {
|
||||||
1, 0, 2, 2, 0, 0, 0, 0,
|
1, 1, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 2,
|
||||||
|
3, 0, 0, 0, 0, 4, 0, 1,
|
||||||
|
1, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 3,
|
||||||
|
0, 1, 1, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 2, 0, 3, 0, 4, 4, 0,
|
0, 3, 0, 0, 0, 0, 4, 0,
|
||||||
|
5, 5, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 4, 0, 1, 0,
|
4, 0, 6, 6, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 5, 0, 7, 6,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
8, 8, 6, 6, 6, 6, 6, 6,
|
0, 6, 0, 7, 7, 0, 0, 0,
|
||||||
6, 6, 6, 6, 6, 6, 6, 8,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
6, 9, 6, 10, 10, 6, 6, 6,
|
0, 0, 0, 8, 0, 9, 9, 0,
|
||||||
6, 6, 6, 6, 6, 6, 6, 6,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
6, 6, 10, 6, 7, 6, 6, 6,
|
0, 0, 0, 0, 0, 10, 0, 0,
|
||||||
6, 6, 11, 6, 4, 4, 13, 12,
|
0, 0, 4, 0, 9, 9, 0, 0,
|
||||||
14, 15, 7, 16, 12, 12, 4, 4,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
11, 17, 12, 4, 12, 19, 18, 20,
|
0, 0, 0, 0, 10, 0, 11, 11,
|
||||||
21, 1, 22, 18, 18, 18, 18, 5,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
23, 18, 24, 18, 21, 21, 1, 22,
|
0, 0, 0, 0, 0, 0, 12, 0,
|
||||||
18, 18, 18, 18, 18, 23, 18, 21,
|
0, 0, 0, 4, 0, 11, 11, 0,
|
||||||
21, 1, 22, 18, 18, 18, 18, 18,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
23, 18, 25, 18, 21, 21, 1, 22,
|
0, 0, 0, 0, 0, 12, 0, 13,
|
||||||
18, 18, 18, 18, 18, 26, 18, 21,
|
13, 0, 0, 0, 0, 0, 0, 0,
|
||||||
21, 1, 22, 18, 18, 18, 18, 18,
|
0, 0, 0, 0, 0, 0, 13, 0,
|
||||||
26, 18, 27, 18, 28, 18, 29, 18,
|
15, 15, 14, 14, 14, 14, 14, 14,
|
||||||
18, 22, 18, 18, 18, 18, 3, 18,
|
14, 14, 14, 14, 14, 14, 14, 14,
|
||||||
30, 18, 18, 18, 18, 22, 18, 22,
|
16, 14, 15, 15, 17, 17, 17, 17,
|
||||||
18, 28, 18, 18, 18, 18, 22, 18,
|
17, 17, 17, 17, 17, 17, 17, 17,
|
||||||
19, 18, 21, 21, 1, 22, 18, 18,
|
17, 17, 16, 17, 17, 17, 17, 18,
|
||||||
18, 18, 18, 23, 18, 32, 31, 33,
|
17, 19, 19, 17, 17, 17, 17, 17,
|
||||||
33, 7, 16, 31, 31, 31, 31, 31,
|
17, 17, 17, 17, 17, 17, 17, 17,
|
||||||
34, 31, 33, 33, 7, 16, 31, 31,
|
17, 18, 17, 20, 20, 17, 17, 17,
|
||||||
31, 31, 31, 34, 31, 35, 31, 33,
|
17, 17, 17, 17, 17, 17, 17, 17,
|
||||||
33, 7, 16, 31, 31, 31, 31, 31,
|
17, 17, 20, 17, 21, 21, 17, 17,
|
||||||
36, 31, 33, 33, 7, 16, 31, 31,
|
17, 17, 17, 17, 17, 17, 17, 17,
|
||||||
31, 31, 31, 36, 31, 37, 31, 38,
|
17, 17, 17, 17, 22, 17, 23, 23,
|
||||||
31, 39, 31, 31, 16, 31, 31, 31,
|
17, 17, 17, 17, 17, 17, 17, 17,
|
||||||
31, 9, 31, 40, 31, 31, 31, 31,
|
17, 17, 17, 17, 17, 17, 24, 17,
|
||||||
16, 31, 16, 31, 38, 31, 31, 31,
|
17, 17, 17, 18, 17, 23, 23, 17,
|
||||||
31, 16, 31, 13, 31, 41, 33, 7,
|
17, 17, 17, 17, 17, 17, 17, 17,
|
||||||
16, 31, 31, 31, 31, 11, 34, 31,
|
17, 17, 17, 17, 17, 24, 17, 25,
|
||||||
13, 31, 33, 33, 7, 16, 31, 31,
|
25, 17, 17, 17, 17, 17, 17, 17,
|
||||||
31, 31, 31, 34, 31, 7, 42, 42,
|
17, 17, 17, 17, 17, 17, 17, 26,
|
||||||
42, 42, 42, 11, 42, 7, 42, 10,
|
17, 17, 17, 17, 18, 17, 25, 25,
|
||||||
10, 42, 42, 42, 42, 42, 42, 42,
|
17, 17, 17, 17, 17, 17, 17, 17,
|
||||||
42, 42, 42, 42, 42, 42, 10, 42,
|
17, 17, 17, 17, 17, 17, 26, 17,
|
||||||
|
15, 15, 17, 17, 17, 17, 17, 17,
|
||||||
|
17, 17, 17, 17, 17, 17, 17, 27,
|
||||||
|
16, 17, 17, 17, 17, 18, 17, 28,
|
||||||
|
28, 17, 17, 17, 17, 17, 17, 17,
|
||||||
|
17, 17, 17, 17, 17, 17, 28, 17,
|
||||||
|
13, 13, 29, 29, 30, 30, 29, 29,
|
||||||
|
29, 29, 2, 2, 29, 31, 29, 13,
|
||||||
|
29, 29, 29, 29, 16, 20, 29, 29,
|
||||||
|
29, 18, 24, 26, 22, 29, 33, 33,
|
||||||
|
32, 32, 32, 32, 32, 32, 32, 34,
|
||||||
|
32, 32, 32, 32, 32, 2, 3, 6,
|
||||||
|
32, 32, 32, 4, 10, 12, 8, 32,
|
||||||
|
35, 35, 32, 32, 32, 32, 32, 32,
|
||||||
|
32, 36, 32, 32, 32, 32, 32, 32,
|
||||||
|
3, 6, 32, 32, 32, 4, 10, 12,
|
||||||
|
8, 32, 5, 5, 32, 32, 32, 32,
|
||||||
|
32, 32, 32, 36, 32, 32, 32, 32,
|
||||||
|
32, 32, 4, 6, 32, 32, 32, 32,
|
||||||
|
32, 32, 8, 32, 6, 32, 7, 7,
|
||||||
|
32, 32, 32, 32, 32, 32, 32, 36,
|
||||||
|
32, 32, 32, 32, 32, 32, 8, 6,
|
||||||
|
32, 37, 37, 32, 32, 32, 32, 32,
|
||||||
|
32, 32, 36, 32, 32, 32, 32, 32,
|
||||||
|
32, 10, 6, 32, 32, 32, 4, 32,
|
||||||
|
32, 8, 32, 38, 38, 32, 32, 32,
|
||||||
|
32, 32, 32, 32, 36, 32, 32, 32,
|
||||||
|
32, 32, 32, 12, 6, 32, 32, 32,
|
||||||
|
4, 10, 32, 8, 32, 35, 35, 32,
|
||||||
|
32, 32, 32, 32, 32, 32, 34, 32,
|
||||||
|
32, 32, 32, 32, 32, 3, 6, 32,
|
||||||
|
32, 32, 4, 10, 12, 8, 32, 15,
|
||||||
|
15, 39, 39, 39, 39, 39, 39, 39,
|
||||||
|
39, 39, 39, 39, 39, 39, 39, 16,
|
||||||
|
39, 39, 39, 39, 18, 39, 41, 41,
|
||||||
|
40, 40, 40, 40, 40, 40, 40, 42,
|
||||||
|
40, 40, 40, 40, 40, 40, 16, 20,
|
||||||
|
40, 40, 40, 18, 24, 26, 22, 40,
|
||||||
|
19, 19, 40, 40, 40, 40, 40, 40,
|
||||||
|
40, 42, 40, 40, 40, 40, 40, 40,
|
||||||
|
18, 20, 40, 40, 40, 40, 40, 40,
|
||||||
|
22, 40, 20, 40, 21, 21, 40, 40,
|
||||||
|
40, 40, 40, 40, 40, 42, 40, 40,
|
||||||
|
40, 40, 40, 40, 22, 20, 40, 43,
|
||||||
|
43, 40, 40, 40, 40, 40, 40, 40,
|
||||||
|
42, 40, 40, 40, 40, 40, 40, 24,
|
||||||
|
20, 40, 40, 40, 18, 40, 40, 22,
|
||||||
|
40, 44, 44, 40, 40, 40, 40, 40,
|
||||||
|
40, 40, 42, 40, 40, 40, 40, 40,
|
||||||
|
40, 26, 20, 40, 40, 40, 18, 24,
|
||||||
|
40, 22, 40, 28, 28, 39, 39, 39,
|
||||||
|
39, 39, 39, 39, 39, 39, 39, 39,
|
||||||
|
39, 39, 28, 39, 45, 45, 40, 40,
|
||||||
|
40, 40, 40, 40, 40, 46, 40, 40,
|
||||||
|
40, 40, 40, 27, 16, 20, 40, 40,
|
||||||
|
40, 18, 24, 26, 22, 40, 41, 41,
|
||||||
|
40, 40, 40, 40, 40, 40, 40, 46,
|
||||||
|
40, 40, 40, 40, 40, 40, 16, 20,
|
||||||
|
40, 40, 40, 18, 24, 26, 22, 40,
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char _khmer_syllable_machine_trans_targs[] = {
|
static const char _khmer_syllable_machine_trans_targs[] = {
|
||||||
10, 14, 17, 20, 11, 21, 10, 24,
|
22, 1, 30, 24, 25, 3, 26, 5,
|
||||||
27, 30, 31, 32, 10, 22, 33, 34,
|
27, 7, 28, 9, 29, 23, 22, 11,
|
||||||
26, 35, 10, 12, 4, 0, 16, 3,
|
32, 22, 33, 13, 34, 15, 35, 17,
|
||||||
13, 15, 1, 10, 18, 2, 19, 10,
|
36, 19, 37, 40, 39, 22, 31, 38,
|
||||||
23, 5, 8, 25, 6, 10, 28, 7,
|
22, 0, 10, 2, 4, 6, 8, 22,
|
||||||
29, 9, 10
|
22, 12, 14, 16, 18, 20, 21
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char _khmer_syllable_machine_trans_actions[] = {
|
static const char _khmer_syllable_machine_trans_actions[] = {
|
||||||
1, 2, 2, 0, 2, 2, 3, 2,
|
1, 0, 2, 2, 2, 0, 0, 0,
|
||||||
2, 0, 2, 2, 6, 2, 0, 0,
|
2, 0, 2, 0, 2, 2, 3, 0,
|
||||||
0, 0, 7, 2, 0, 0, 0, 0,
|
4, 5, 2, 0, 0, 0, 2, 0,
|
||||||
2, 2, 0, 8, 0, 0, 0, 9,
|
2, 0, 2, 4, 4, 8, 9, 0,
|
||||||
2, 0, 0, 2, 0, 10, 0, 0,
|
10, 0, 0, 0, 0, 0, 0, 11,
|
||||||
0, 0, 11
|
12, 0, 0, 0, 0, 0, 0
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char _khmer_syllable_machine_to_state_actions[] = {
|
static const char _khmer_syllable_machine_to_state_actions[] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 4, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 6, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char _khmer_syllable_machine_from_state_actions[] = {
|
static const char _khmer_syllable_machine_from_state_actions[] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 5, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 7, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
0, 0, 0, 0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char _khmer_syllable_machine_eof_trans[] = {
|
static const unsigned char _khmer_syllable_machine_eof_trans[] = {
|
||||||
1, 1, 1, 1, 1, 7, 7, 7,
|
1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
7, 7, 0, 19, 19, 19, 19, 19,
|
1, 1, 1, 15, 18, 18, 18, 18,
|
||||||
19, 19, 19, 19, 19, 19, 32, 32,
|
18, 18, 18, 18, 18, 18, 0, 33,
|
||||||
32, 32, 32, 32, 32, 32, 32, 32,
|
33, 33, 33, 33, 33, 33, 33, 40,
|
||||||
32, 43, 43, 43
|
41, 41, 41, 41, 41, 41, 40, 41,
|
||||||
|
41
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int khmer_syllable_machine_start = 10;
|
static const int khmer_syllable_machine_start = 22;
|
||||||
static const int khmer_syllable_machine_first_final = 10;
|
static const int khmer_syllable_machine_first_final = 22;
|
||||||
static const int khmer_syllable_machine_error = -1;
|
static const int khmer_syllable_machine_error = -1;
|
||||||
|
|
||||||
static const int khmer_syllable_machine_en_main = 10;
|
static const int khmer_syllable_machine_en_main = 22;
|
||||||
|
|
||||||
|
|
||||||
#line 36 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 36 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#line 74 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 80 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
|
|
||||||
|
|
||||||
#define found_syllable(syllable_type) \
|
#define found_syllable(syllable_type) \
|
||||||
|
@ -177,7 +247,7 @@ find_syllables (hb_buffer_t *buffer)
|
||||||
int cs;
|
int cs;
|
||||||
hb_glyph_info_t *info = buffer->info;
|
hb_glyph_info_t *info = buffer->info;
|
||||||
|
|
||||||
#line 181 "hb-ot-shape-complex-khmer-machine.hh"
|
#line 251 "hb-ot-shape-complex-khmer-machine.hh"
|
||||||
{
|
{
|
||||||
cs = khmer_syllable_machine_start;
|
cs = khmer_syllable_machine_start;
|
||||||
ts = 0;
|
ts = 0;
|
||||||
|
@ -185,7 +255,7 @@ find_syllables (hb_buffer_t *buffer)
|
||||||
act = 0;
|
act = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#line 95 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 101 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
|
|
||||||
|
|
||||||
p = 0;
|
p = 0;
|
||||||
|
@ -194,7 +264,7 @@ find_syllables (hb_buffer_t *buffer)
|
||||||
unsigned int last = 0;
|
unsigned int last = 0;
|
||||||
unsigned int syllable_serial = 1;
|
unsigned int syllable_serial = 1;
|
||||||
|
|
||||||
#line 198 "hb-ot-shape-complex-khmer-machine.hh"
|
#line 268 "hb-ot-shape-complex-khmer-machine.hh"
|
||||||
{
|
{
|
||||||
int _slen;
|
int _slen;
|
||||||
int _trans;
|
int _trans;
|
||||||
|
@ -204,11 +274,11 @@ find_syllables (hb_buffer_t *buffer)
|
||||||
goto _test_eof;
|
goto _test_eof;
|
||||||
_resume:
|
_resume:
|
||||||
switch ( _khmer_syllable_machine_from_state_actions[cs] ) {
|
switch ( _khmer_syllable_machine_from_state_actions[cs] ) {
|
||||||
case 5:
|
case 7:
|
||||||
#line 1 "NONE"
|
#line 1 "NONE"
|
||||||
{ts = p;}
|
{ts = p;}
|
||||||
break;
|
break;
|
||||||
#line 212 "hb-ot-shape-complex-khmer-machine.hh"
|
#line 282 "hb-ot-shape-complex-khmer-machine.hh"
|
||||||
}
|
}
|
||||||
|
|
||||||
_keys = _khmer_syllable_machine_trans_keys + (cs<<1);
|
_keys = _khmer_syllable_machine_trans_keys + (cs<<1);
|
||||||
|
@ -231,47 +301,63 @@ _eof_trans:
|
||||||
{te = p+1;}
|
{te = p+1;}
|
||||||
break;
|
break;
|
||||||
case 8:
|
case 8:
|
||||||
#line 68 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 76 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
{te = p+1;{ found_syllable (consonant_syllable); }}
|
|
||||||
break;
|
|
||||||
case 10:
|
|
||||||
#line 69 "hb-ot-shape-complex-khmer-machine.rl"
|
|
||||||
{te = p+1;{ found_syllable (broken_cluster); }}
|
|
||||||
break;
|
|
||||||
case 6:
|
|
||||||
#line 70 "hb-ot-shape-complex-khmer-machine.rl"
|
|
||||||
{te = p+1;{ found_syllable (non_khmer_cluster); }}
|
{te = p+1;{ found_syllable (non_khmer_cluster); }}
|
||||||
break;
|
break;
|
||||||
case 7:
|
case 10:
|
||||||
#line 68 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 74 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
{te = p;p--;{ found_syllable (consonant_syllable); }}
|
{te = p;p--;{ found_syllable (consonant_syllable); }}
|
||||||
break;
|
break;
|
||||||
case 9:
|
case 12:
|
||||||
#line 69 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 75 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
{te = p;p--;{ found_syllable (broken_cluster); }}
|
{te = p;p--;{ found_syllable (broken_cluster); }}
|
||||||
break;
|
break;
|
||||||
case 11:
|
case 11:
|
||||||
#line 70 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 76 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
{te = p;p--;{ found_syllable (non_khmer_cluster); }}
|
{te = p;p--;{ found_syllable (non_khmer_cluster); }}
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
#line 68 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 74 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
{{p = ((te))-1;}{ found_syllable (consonant_syllable); }}
|
{{p = ((te))-1;}{ found_syllable (consonant_syllable); }}
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 5:
|
||||||
#line 69 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 75 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
{{p = ((te))-1;}{ found_syllable (broken_cluster); }}
|
{{p = ((te))-1;}{ found_syllable (broken_cluster); }}
|
||||||
break;
|
break;
|
||||||
#line 266 "hb-ot-shape-complex-khmer-machine.hh"
|
case 3:
|
||||||
|
#line 1 "NONE"
|
||||||
|
{ switch( act ) {
|
||||||
|
case 2:
|
||||||
|
{{p = ((te))-1;} found_syllable (broken_cluster); }
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
{{p = ((te))-1;} found_syllable (non_khmer_cluster); }
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
#line 1 "NONE"
|
||||||
|
{te = p+1;}
|
||||||
|
#line 75 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
|
{act = 2;}
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
#line 1 "NONE"
|
||||||
|
{te = p+1;}
|
||||||
|
#line 76 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
|
{act = 3;}
|
||||||
|
break;
|
||||||
|
#line 352 "hb-ot-shape-complex-khmer-machine.hh"
|
||||||
}
|
}
|
||||||
|
|
||||||
_again:
|
_again:
|
||||||
switch ( _khmer_syllable_machine_to_state_actions[cs] ) {
|
switch ( _khmer_syllable_machine_to_state_actions[cs] ) {
|
||||||
case 4:
|
case 6:
|
||||||
#line 1 "NONE"
|
#line 1 "NONE"
|
||||||
{ts = 0;}
|
{ts = 0;}
|
||||||
break;
|
break;
|
||||||
#line 275 "hb-ot-shape-complex-khmer-machine.hh"
|
#line 361 "hb-ot-shape-complex-khmer-machine.hh"
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ++p != pe )
|
if ( ++p != pe )
|
||||||
|
@ -287,7 +373,7 @@ _again:
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#line 104 "hb-ot-shape-complex-khmer-machine.rl"
|
#line 110 "hb-ot-shape-complex-khmer-machine.rl"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,28 +40,34 @@
|
||||||
# Same order as enum khmer_category_t. Not sure how to avoid duplication.
|
# Same order as enum khmer_category_t. Not sure how to avoid duplication.
|
||||||
C = 1;
|
C = 1;
|
||||||
V = 2;
|
V = 2;
|
||||||
N = 3;
|
|
||||||
ZWNJ = 5;
|
ZWNJ = 5;
|
||||||
ZWJ = 6;
|
ZWJ = 6;
|
||||||
M = 7;
|
|
||||||
SM = 8;
|
|
||||||
PLACEHOLDER = 11;
|
PLACEHOLDER = 11;
|
||||||
DOTTEDCIRCLE = 12;
|
DOTTEDCIRCLE = 12;
|
||||||
RS = 13;
|
|
||||||
Coeng= 14;
|
Coeng= 14;
|
||||||
Ra = 16;
|
Ra = 16;
|
||||||
|
Robatic = 20;
|
||||||
|
Xgroup = 21;
|
||||||
|
Ygroup = 22;
|
||||||
|
VAbv = 26;
|
||||||
|
VBlw = 27;
|
||||||
|
VPre = 28;
|
||||||
|
VPst = 29;
|
||||||
|
|
||||||
c = (C | Ra | V); # is_consonant
|
c = (C | Ra | V);
|
||||||
n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier
|
cn = c.((ZWJ|ZWNJ)?.Robatic)?;
|
||||||
z = ZWJ|ZWNJ; # is_joiner
|
joiner = (ZWJ | ZWNJ);
|
||||||
|
xgroup = (joiner*.Xgroup)*;
|
||||||
|
ygroup = Ygroup*;
|
||||||
|
|
||||||
cn = c.n?;
|
# This grammar was experimentally extracted from what Uniscribe allows.
|
||||||
matra_group = z?.M.N?;
|
|
||||||
syllable_tail = (SM.SM?)?;
|
matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?;
|
||||||
|
syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup;
|
||||||
|
|
||||||
|
|
||||||
broken_cluster = n? (Coeng.cn)* matra_group* (Coeng.cn)? syllable_tail;
|
broken_cluster = (Coeng.cn)* syllable_tail;
|
||||||
consonant_syllable = (c|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
|
consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster;
|
||||||
other = any;
|
other = any;
|
||||||
|
|
||||||
main := |*
|
main := |*
|
||||||
|
|
|
@ -241,7 +241,6 @@ setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
|
||||||
hb_font_t *font HB_UNUSED)
|
hb_font_t *font HB_UNUSED)
|
||||||
{
|
{
|
||||||
HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
|
HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
|
||||||
HB_BUFFER_ALLOCATE_VAR (buffer, khmer_position);
|
|
||||||
|
|
||||||
/* We cannot setup masks here. We save information about characters
|
/* We cannot setup masks here. We save information about characters
|
||||||
* and setup masks later on in a pause-callback. */
|
* and setup masks later on in a pause-callback. */
|
||||||
|
@ -330,7 +329,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reorder left matra piece. */
|
/* Reorder left matra piece. */
|
||||||
else if (info[i].khmer_position() == POS_PRE_M)
|
else if (info[i].khmer_category() == OT_VPre)
|
||||||
{
|
{
|
||||||
/* Move to the start. */
|
/* Move to the start. */
|
||||||
buffer->merge_clusters (start, i + 1);
|
buffer->merge_clusters (start, i + 1);
|
||||||
|
@ -432,7 +431,6 @@ reorder (const hb_ot_shape_plan_t *plan,
|
||||||
initial_reordering_syllable (plan, font->face, buffer, start, end);
|
initial_reordering_syllable (plan, font->face, buffer, start, end);
|
||||||
|
|
||||||
HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
|
HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
|
||||||
HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_position);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
@ -34,30 +34,22 @@
|
||||||
|
|
||||||
/* buffer var allocations */
|
/* buffer var allocations */
|
||||||
#define khmer_category() indic_category() /* khmer_category_t */
|
#define khmer_category() indic_category() /* khmer_category_t */
|
||||||
#define khmer_position() indic_position() /* khmer_position_t */
|
#define khmer_position() indic_position() /* indic_position_t */
|
||||||
|
|
||||||
|
|
||||||
typedef indic_category_t khmer_category_t;
|
/* Note: This enum is duplicated in the -machine.rl source file.
|
||||||
typedef indic_position_t khmer_position_t;
|
* Not sure how to avoid duplication. */
|
||||||
|
enum khmer_category_t
|
||||||
|
|
||||||
static inline khmer_position_t
|
|
||||||
matra_position_khmer (khmer_position_t side)
|
|
||||||
{
|
{
|
||||||
switch ((int) side)
|
OT_Robatic = 20,
|
||||||
{
|
OT_Xgroup = 21,
|
||||||
case POS_PRE_C:
|
OT_Ygroup = 22,
|
||||||
return POS_PRE_M;
|
|
||||||
|
|
||||||
case POS_POST_C:
|
OT_VAbv = 26,
|
||||||
case POS_ABOVE_C:
|
OT_VBlw = 27,
|
||||||
case POS_BELOW_C:
|
OT_VPre = 28,
|
||||||
return POS_AFTER_POST;
|
OT_VPst = 29,
|
||||||
|
|
||||||
default:
|
|
||||||
return side;
|
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
set_khmer_properties (hb_glyph_info_t &info)
|
set_khmer_properties (hb_glyph_info_t &info)
|
||||||
|
@ -65,47 +57,58 @@ set_khmer_properties (hb_glyph_info_t &info)
|
||||||
hb_codepoint_t u = info.codepoint;
|
hb_codepoint_t u = info.codepoint;
|
||||||
unsigned int type = hb_indic_get_categories (u);
|
unsigned int type = hb_indic_get_categories (u);
|
||||||
khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
|
khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
|
||||||
khmer_position_t pos = (khmer_position_t) (type >> 8);
|
indic_position_t pos = (indic_position_t) (type >> 8);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Re-assign category
|
* Re-assign category
|
||||||
|
*
|
||||||
|
* These categories are experimentally extracted from what Uniscribe allows.
|
||||||
*/
|
*/
|
||||||
|
switch (u)
|
||||||
if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */
|
|
||||||
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) ||
|
|
||||||
u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */
|
|
||||||
{
|
{
|
||||||
/* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier.
|
case 0x179Au:
|
||||||
* https://github.com/roozbehp/unicode-data/issues/5 */
|
cat = (khmer_category_t) OT_Ra;
|
||||||
cat = OT_M;
|
break;
|
||||||
pos = POS_ABOVE_C;
|
|
||||||
}
|
|
||||||
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER;
|
|
||||||
else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
|
|
||||||
|
|
||||||
|
case 0x17CCu:
|
||||||
|
case 0x17C9u:
|
||||||
|
case 0x17CAu:
|
||||||
|
cat = OT_Robatic;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0x17C6u:
|
||||||
|
case 0x17CBu:
|
||||||
|
case 0x17CDu:
|
||||||
|
case 0x17CEu:
|
||||||
|
case 0x17CFu:
|
||||||
|
case 0x17D0u:
|
||||||
|
case 0x17D1u:
|
||||||
|
cat = OT_Xgroup;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0x17C7u:
|
||||||
|
case 0x17C8u:
|
||||||
|
case 0x17DDu:
|
||||||
|
case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
|
||||||
|
cat = OT_Ygroup;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Re-assign position.
|
* Re-assign position.
|
||||||
*/
|
*/
|
||||||
|
if (cat == (khmer_category_t) OT_M)
|
||||||
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
|
switch ((int) pos)
|
||||||
{
|
{
|
||||||
pos = POS_BASE_C;
|
case POS_PRE_C: cat = OT_VPre; break;
|
||||||
if (u == 0x179Au)
|
case POS_BELOW_C: cat = OT_VBlw; break;
|
||||||
cat = OT_Ra;
|
case POS_ABOVE_C: cat = OT_VAbv; break;
|
||||||
}
|
case POS_POST_C: cat = OT_VPst; break;
|
||||||
else if (cat == OT_M)
|
default: assert (0);
|
||||||
{
|
};
|
||||||
pos = matra_position_khmer (pos);
|
|
||||||
}
|
|
||||||
else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol))))
|
|
||||||
{
|
|
||||||
pos = POS_SMVD;
|
|
||||||
}
|
|
||||||
|
|
||||||
info.khmer_category() = cat;
|
info.khmer_category() = cat;
|
||||||
info.khmer_position() = pos;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue