[indic] Remove remnants of Sinhala

This commit is contained in:
David Corbett 2022-06-25 13:32:04 -04:00
parent 0cc948b96c
commit 78c5ae3979
9 changed files with 366 additions and 509 deletions

View File

@ -219,7 +219,7 @@ notes [fontname="Verdana",shape=box,label=<<table border="0" cellborder="0" cell
<b>Indic</b> scripts are: Bengali, Devanagari,
Gujarati, Gurmukhi, Kannada,
Malayalam, Oriya, Tamil,
Telugu, Sinhala
Telugu
</td></tr>
<tr><td align="left">
@ -240,7 +240,7 @@ Mongolian, Multani, Nandinagari, Newa, Nko, Nyiakeng Puachue Hmong,
Old Sogdian, Pahawh Hmong, Phags Pa, Psalter Pahlavi, Rejang,
</td></tr>
<tr><td align="left">
Saurashtra, Sharada, Siddham, Sogdian, Soyombo, Sundanese,
Saurashtra, Sharada, Siddham, Sinhala, Sogdian, Soyombo, Sundanese,
Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, Tai Viet,
</td></tr>
<tr><td align="left">

View File

@ -237,7 +237,7 @@
<para>
The <emphasis>Indic</emphasis> shaping model handles the Indic
scripts Bengali, Devanagari, Gujarati, Gurmukhi, Kannada,
Malayalam, Oriya, Tamil, Telugu, and Sinhala.
Malayalam, Oriya, Tamil, and Telugu.
</para>
<para>
The Indic shaping model was revised significantly in

View File

@ -237,8 +237,7 @@
<listitem>
<para>
Indic (covering Devanagari, Bengali, Gujarati,
Gurmukhi, Kannada, Malayalam, Oriya, Tamil, Telugu, and
Sinhala)
Gurmukhi, Kannada, Malayalam, Oriya, Tamil, and Telugu)
</para>
</listitem>
<listitem>

View File

@ -26,7 +26,6 @@ ALLOWED_BLOCKS = [
'Telugu',
'Kannada',
'Malayalam',
'Sinhala',
'Myanmar',
'Khmer',
'Vedic Extensions',
@ -236,7 +235,6 @@ category_overrides = {
0x0C30: 'Ra', # Telugu Reph formed only with ZWJ
0x0CB0: 'Ra', # Kannada
0x0D30: 'Ra', # Malayalam No Reph, Logical Repha
0x0DBB: 'Ra', # Sinhala Reph formed only with ZWJ
# The following act more like the Bindus.
0x0953: 'SM',
@ -392,7 +390,6 @@ def matra_pos_right(u, block):
if block == 'Telugu': return 'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB'
if block == 'Kannada': return 'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB'
if block == 'Malayalam': return 'AFTER_POST'
if block == 'Sinhala': return 'AFTER_SUB'
return 'AFTER_SUB'
def matra_pos_top(u, block):
# BENG and MLYM don't have top matras.
@ -403,7 +400,6 @@ def matra_pos_top(u, block):
if block == 'Tamil': return 'AFTER_SUB'
if block == 'Telugu': return 'BEFORE_SUB'
if block == 'Kannada': return 'BEFORE_SUB'
if block == 'Sinhala': return 'AFTER_SUB'
return 'AFTER_SUB'
def matra_pos_bottom(u, block):
if block == 'Devanagari': return 'AFTER_SUB'
@ -415,7 +411,6 @@ def matra_pos_bottom(u, block):
if block == 'Telugu': return 'BEFORE_SUB'
if block == 'Kannada': return 'BEFORE_SUB'
if block == 'Malayalam': return 'AFTER_POST'
if block == 'Sinhala': return 'AFTER_SUB'
return "AFTER_SUB"
def indic_matra_position(u, pos, block): # Reposition matra
if pos == 'PRE_C': return matra_pos_left(u, block)

View File

@ -77,279 +77,266 @@ enum indic_syllable_type_t {
#line 79 "hb-ot-shaper-indic-machine.hh"
static const unsigned char _indic_syllable_machine_trans_keys[] = {
8u, 8u, 4u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 15u, 15u, 4u, 8u,
4u, 12u, 4u, 8u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 15u, 15u,
4u, 8u, 4u, 12u, 4u, 12u, 4u, 12u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u,
6u, 6u, 15u, 15u, 4u, 8u, 4u, 8u, 4u, 12u, 8u, 8u, 5u, 7u, 5u, 8u,
4u, 8u, 6u, 6u, 15u, 15u, 4u, 8u, 4u, 8u, 5u, 8u, 8u, 8u, 1u, 18u,
3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 5u, 9u, 5u, 9u, 9u, 9u, 5u, 9u,
1u, 15u, 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, 4u, 9u, 5u, 9u,
3u, 9u, 5u, 9u, 3u, 16u, 3u, 16u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u,
3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u,
1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, 4u, 9u, 5u, 9u, 5u, 9u, 3u, 9u,
5u, 9u, 3u, 16u, 3u, 16u, 4u, 8u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u,
3u, 16u, 1u, 15u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 1u, 15u, 3u, 9u,
4u, 9u, 5u, 9u, 3u, 16u, 4u, 9u, 5u, 9u, 5u, 9u, 3u, 9u, 5u, 9u,
3u, 16u, 4u, 12u, 4u, 8u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 3u, 16u,
1u, 15u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u,
5u, 9u, 3u, 16u, 4u, 9u, 5u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, 1u, 16u,
3u, 16u, 1u, 16u, 4u, 12u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 3u, 9u,
5u, 9u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 0
8u, 8u, 4u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 4u, 12u, 4u, 8u, 8u, 8u,
5u, 7u, 5u, 8u, 4u, 8u, 4u, 12u, 4u, 12u, 4u, 12u, 8u, 8u, 5u, 7u,
5u, 8u, 4u, 8u, 4u, 8u, 4u, 12u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u,
4u, 8u, 5u, 8u, 8u, 8u, 1u, 18u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u,
5u, 9u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 1u, 15u, 1u, 15u, 3u, 9u,
4u, 9u, 5u, 9u, 4u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, 3u, 16u, 3u, 16u,
3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u,
5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u,
4u, 9u, 5u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, 3u, 16u, 3u, 16u, 4u, 8u,
3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 3u, 16u, 1u, 15u, 5u, 9u, 9u, 9u,
5u, 9u, 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, 3u, 16u, 4u, 9u,
5u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, 3u, 16u, 4u, 12u, 4u, 8u, 3u, 16u,
3u, 16u, 4u, 16u, 1u, 15u, 3u, 16u, 1u, 15u, 5u, 9u, 9u, 9u, 5u, 9u,
1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, 3u, 16u, 4u, 9u, 5u, 9u,
5u, 9u, 3u, 9u, 5u, 9u, 1u, 16u, 3u, 16u, 1u, 16u, 4u, 12u, 5u, 9u,
9u, 9u, 5u, 9u, 1u, 15u, 3u, 9u, 5u, 9u, 5u, 9u, 9u, 9u, 5u, 9u,
1u, 15u, 0
};
static const char _indic_syllable_machine_key_spans[] = {
1, 5, 3, 4, 5, 1, 1, 5,
9, 5, 1, 3, 4, 5, 1, 1,
5, 9, 9, 9, 1, 3, 4, 5,
1, 1, 5, 5, 9, 1, 3, 4,
5, 1, 1, 5, 5, 4, 1, 18,
14, 14, 13, 15, 5, 5, 1, 5,
15, 15, 15, 7, 6, 5, 6, 5,
7, 5, 14, 14, 14, 14, 13, 15,
14, 14, 13, 15, 5, 1, 5, 15,
15, 7, 6, 5, 6, 5, 5, 7,
5, 14, 14, 5, 14, 14, 13, 15,
14, 15, 5, 1, 5, 15, 15, 7,
6, 5, 14, 6, 5, 5, 7, 5,
14, 9, 5, 14, 14, 13, 15, 14,
15, 5, 1, 5, 15, 15, 7, 6,
5, 14, 6, 5, 5, 7, 5, 16,
14, 16, 9, 5, 1, 5, 15, 7,
5, 5, 1, 5, 15
1, 5, 3, 4, 5, 9, 5, 1,
3, 4, 5, 9, 9, 9, 1, 3,
4, 5, 5, 9, 1, 3, 4, 5,
5, 4, 1, 18, 14, 14, 13, 15,
5, 5, 1, 5, 15, 15, 15, 7,
6, 5, 6, 5, 7, 5, 14, 14,
14, 14, 13, 15, 14, 14, 13, 15,
5, 1, 5, 15, 15, 7, 6, 5,
6, 5, 5, 7, 5, 14, 14, 5,
14, 14, 13, 15, 14, 15, 5, 1,
5, 15, 15, 7, 6, 5, 14, 6,
5, 5, 7, 5, 14, 9, 5, 14,
14, 13, 15, 14, 15, 5, 1, 5,
15, 15, 7, 6, 5, 14, 6, 5,
5, 7, 5, 16, 14, 16, 9, 5,
1, 5, 15, 7, 5, 5, 1, 5,
15
};
static const short _indic_syllable_machine_index_offsets[] = {
0, 2, 8, 12, 17, 23, 25, 27,
33, 43, 49, 51, 55, 60, 66, 68,
70, 76, 86, 96, 106, 108, 112, 117,
123, 125, 127, 133, 139, 149, 151, 155,
160, 166, 168, 170, 176, 182, 187, 189,
208, 223, 238, 252, 268, 274, 280, 282,
288, 304, 320, 336, 344, 351, 357, 364,
370, 378, 384, 399, 414, 429, 444, 458,
474, 489, 504, 518, 534, 540, 542, 548,
564, 580, 588, 595, 601, 608, 614, 620,
628, 634, 649, 664, 670, 685, 700, 714,
730, 745, 761, 767, 769, 775, 791, 807,
815, 822, 828, 843, 850, 856, 862, 870,
876, 891, 901, 907, 922, 937, 951, 967,
982, 998, 1004, 1006, 1012, 1028, 1044, 1052,
1059, 1065, 1080, 1087, 1093, 1099, 1107, 1113,
1130, 1145, 1162, 1172, 1178, 1180, 1186, 1202,
1210, 1216, 1222, 1224, 1230
0, 2, 8, 12, 17, 23, 33, 39,
41, 45, 50, 56, 66, 76, 86, 88,
92, 97, 103, 109, 119, 121, 125, 130,
136, 142, 147, 149, 168, 183, 198, 212,
228, 234, 240, 242, 248, 264, 280, 296,
304, 311, 317, 324, 330, 338, 344, 359,
374, 389, 404, 418, 434, 449, 464, 478,
494, 500, 502, 508, 524, 540, 548, 555,
561, 568, 574, 580, 588, 594, 609, 624,
630, 645, 660, 674, 690, 705, 721, 727,
729, 735, 751, 767, 775, 782, 788, 803,
810, 816, 822, 830, 836, 851, 861, 867,
882, 897, 911, 927, 942, 958, 964, 966,
972, 988, 1004, 1012, 1019, 1025, 1040, 1047,
1053, 1059, 1067, 1073, 1090, 1105, 1122, 1132,
1138, 1140, 1146, 1162, 1170, 1176, 1182, 1184,
1190
};
static const unsigned char _indic_syllable_machine_indicies[] = {
1, 0, 2, 3, 3, 4, 1, 0,
3, 3, 4, 0, 3, 3, 4, 1,
0, 5, 3, 3, 4, 1, 0, 6,
0, 7, 0, 8, 3, 3, 4, 1,
0, 2, 3, 3, 4, 1, 0, 0,
0, 9, 0, 11, 12, 12, 13, 14,
10, 14, 10, 12, 12, 13, 10, 12,
12, 13, 14, 10, 15, 12, 12, 13,
14, 10, 16, 10, 17, 10, 18, 12,
12, 13, 14, 10, 11, 12, 12, 13,
14, 10, 10, 10, 19, 10, 11, 12,
12, 13, 14, 10, 10, 10, 20, 10,
22, 23, 23, 24, 25, 21, 21, 21,
26, 21, 25, 21, 23, 23, 24, 27,
23, 23, 24, 25, 21, 28, 23, 23,
24, 25, 21, 29, 21, 30, 21, 22,
23, 23, 24, 25, 21, 31, 23, 23,
24, 25, 21, 33, 34, 34, 35, 36,
32, 32, 32, 37, 32, 36, 32, 34,
34, 35, 32, 34, 34, 35, 36, 32,
38, 34, 34, 35, 36, 32, 39, 32,
40, 32, 33, 34, 34, 35, 36, 32,
41, 34, 34, 35, 36, 32, 23, 23,
24, 1, 0, 43, 42, 45, 46, 47,
48, 49, 50, 24, 25, 51, 52, 52,
26, 44, 53, 54, 55, 56, 57, 44,
59, 60, 61, 62, 4, 1, 63, 58,
58, 9, 58, 58, 58, 64, 58, 65,
60, 66, 66, 4, 1, 63, 58, 58,
58, 58, 58, 58, 64, 58, 60, 66,
66, 4, 1, 63, 58, 58, 58, 58,
58, 58, 64, 58, 45, 58, 58, 58,
67, 68, 58, 1, 63, 58, 58, 58,
58, 58, 45, 58, 69, 69, 58, 1,
63, 58, 63, 58, 58, 70, 63, 58,
63, 58, 63, 58, 58, 58, 63, 58,
45, 58, 71, 58, 69, 69, 58, 1,
63, 58, 58, 58, 58, 58, 45, 58,
45, 58, 58, 58, 69, 69, 58, 1,
63, 58, 58, 58, 58, 58, 45, 58,
45, 58, 58, 58, 69, 68, 58, 1,
63, 58, 58, 58, 58, 58, 45, 58,
72, 7, 73, 74, 4, 1, 63, 58,
7, 73, 74, 4, 1, 63, 58, 73,
73, 4, 1, 63, 58, 75, 76, 76,
4, 1, 63, 58, 67, 77, 58, 1,
63, 58, 67, 58, 69, 69, 58, 1,
63, 58, 69, 77, 58, 1, 63, 58,
59, 60, 66, 66, 4, 1, 63, 58,
58, 58, 58, 58, 58, 64, 58, 59,
60, 61, 66, 4, 1, 63, 58, 58,
9, 58, 58, 58, 64, 58, 79, 80,
81, 82, 13, 14, 83, 78, 78, 20,
78, 78, 78, 84, 78, 85, 80, 86,
82, 13, 14, 83, 78, 78, 78, 78,
78, 78, 84, 78, 80, 86, 82, 13,
14, 83, 78, 78, 78, 78, 78, 78,
84, 78, 87, 78, 78, 78, 88, 89,
78, 14, 83, 78, 78, 78, 78, 78,
87, 78, 90, 80, 91, 92, 13, 14,
83, 78, 78, 19, 78, 78, 78, 84,
78, 93, 80, 86, 86, 13, 14, 83,
78, 78, 78, 78, 78, 78, 84, 78,
80, 86, 86, 13, 14, 83, 78, 78,
78, 78, 78, 78, 84, 78, 87, 78,
78, 78, 94, 89, 78, 14, 83, 78,
78, 78, 78, 78, 87, 78, 83, 78,
78, 95, 83, 78, 83, 78, 83, 78,
78, 78, 83, 78, 87, 78, 96, 78,
94, 94, 78, 14, 83, 78, 78, 78,
78, 78, 87, 78, 87, 78, 78, 78,
94, 94, 78, 14, 83, 78, 78, 78,
78, 78, 87, 78, 97, 17, 98, 99,
13, 14, 83, 78, 17, 98, 99, 13,
14, 83, 78, 98, 98, 13, 14, 83,
78, 100, 101, 101, 13, 14, 83, 78,
88, 102, 78, 14, 83, 78, 94, 94,
78, 14, 83, 78, 88, 78, 94, 94,
78, 14, 83, 78, 94, 102, 78, 14,
83, 78, 90, 80, 86, 86, 13, 14,
83, 78, 78, 78, 78, 78, 78, 84,
78, 90, 80, 91, 86, 13, 14, 83,
78, 78, 19, 78, 78, 78, 84, 78,
11, 12, 12, 13, 14, 78, 79, 80,
86, 82, 13, 14, 83, 78, 78, 78,
78, 78, 78, 84, 78, 104, 48, 105,
105, 24, 25, 51, 103, 103, 103, 103,
103, 103, 55, 103, 48, 105, 105, 24,
25, 51, 103, 103, 103, 103, 103, 103,
55, 103, 106, 103, 103, 103, 107, 108,
103, 25, 51, 103, 103, 103, 103, 103,
106, 103, 47, 48, 109, 110, 24, 25,
51, 103, 103, 26, 103, 103, 103, 55,
103, 106, 103, 103, 103, 111, 108, 103,
25, 51, 103, 103, 103, 103, 103, 106,
103, 51, 103, 103, 112, 51, 103, 51,
103, 51, 103, 103, 103, 51, 103, 106,
103, 113, 103, 111, 111, 103, 25, 51,
103, 103, 103, 103, 103, 106, 103, 106,
103, 103, 103, 111, 111, 103, 25, 51,
103, 103, 103, 103, 103, 106, 103, 114,
30, 115, 116, 24, 25, 51, 103, 30,
115, 116, 24, 25, 51, 103, 115, 115,
24, 25, 51, 103, 47, 48, 105, 105,
24, 25, 51, 103, 103, 103, 103, 103,
103, 55, 103, 117, 118, 118, 24, 25,
51, 103, 107, 119, 103, 25, 51, 103,
111, 111, 103, 25, 51, 103, 107, 103,
111, 111, 103, 25, 51, 103, 111, 119,
103, 25, 51, 103, 47, 48, 109, 105,
24, 25, 51, 103, 103, 26, 103, 103,
103, 55, 103, 22, 23, 23, 24, 25,
120, 120, 120, 26, 120, 22, 23, 23,
24, 25, 120, 122, 123, 124, 125, 35,
36, 126, 121, 121, 37, 121, 121, 121,
127, 121, 128, 123, 125, 125, 35, 36,
126, 121, 121, 121, 121, 121, 121, 127,
121, 123, 125, 125, 35, 36, 126, 121,
121, 121, 121, 121, 121, 127, 121, 129,
121, 121, 121, 130, 131, 121, 36, 126,
121, 121, 121, 121, 121, 129, 121, 122,
123, 124, 52, 35, 36, 126, 121, 121,
37, 121, 121, 121, 127, 121, 129, 121,
121, 121, 132, 131, 121, 36, 126, 121,
121, 121, 121, 121, 129, 121, 126, 121,
121, 133, 126, 121, 126, 121, 126, 121,
121, 121, 126, 121, 129, 121, 134, 121,
132, 132, 121, 36, 126, 121, 121, 121,
121, 121, 129, 121, 129, 121, 121, 121,
132, 132, 121, 36, 126, 121, 121, 121,
121, 121, 129, 121, 135, 40, 136, 137,
35, 36, 126, 121, 40, 136, 137, 35,
36, 126, 121, 136, 136, 35, 36, 126,
121, 122, 123, 125, 125, 35, 36, 126,
121, 121, 121, 121, 121, 121, 127, 121,
138, 139, 139, 35, 36, 126, 121, 130,
140, 121, 36, 126, 121, 132, 132, 121,
36, 126, 121, 130, 121, 132, 132, 121,
36, 126, 121, 132, 140, 121, 36, 126,
121, 45, 46, 47, 48, 109, 105, 24,
25, 51, 52, 52, 26, 103, 103, 45,
55, 103, 59, 141, 61, 62, 4, 1,
63, 58, 58, 9, 58, 58, 58, 64,
58, 45, 46, 47, 48, 142, 143, 24,
144, 145, 58, 52, 26, 58, 58, 45,
55, 58, 22, 146, 146, 24, 144, 63,
58, 58, 26, 58, 145, 58, 58, 147,
145, 58, 145, 58, 145, 58, 58, 58,
145, 58, 45, 58, 71, 22, 146, 146,
24, 144, 63, 58, 58, 58, 58, 58,
45, 58, 149, 148, 150, 150, 148, 43,
151, 148, 150, 150, 148, 43, 151, 148,
151, 148, 148, 152, 151, 148, 151, 148,
151, 148, 148, 148, 151, 148, 45, 120,
120, 120, 120, 120, 120, 120, 120, 52,
120, 120, 120, 120, 45, 120, 0
0, 5, 3, 3, 4, 1, 0, 2,
3, 3, 4, 1, 0, 0, 0, 6,
0, 8, 9, 9, 10, 11, 7, 11,
7, 9, 9, 10, 7, 9, 9, 10,
11, 7, 12, 9, 9, 10, 11, 7,
8, 9, 9, 10, 11, 7, 7, 7,
13, 7, 8, 9, 9, 10, 11, 7,
7, 7, 14, 7, 16, 17, 17, 18,
19, 15, 15, 15, 20, 15, 19, 15,
17, 17, 18, 21, 17, 17, 18, 19,
15, 16, 17, 17, 18, 19, 15, 22,
17, 17, 18, 19, 15, 24, 25, 25,
26, 27, 23, 23, 23, 28, 23, 27,
23, 25, 25, 26, 23, 25, 25, 26,
27, 23, 24, 25, 25, 26, 27, 23,
29, 25, 25, 26, 27, 23, 17, 17,
18, 1, 0, 31, 30, 33, 34, 35,
36, 37, 38, 18, 19, 39, 40, 40,
20, 32, 41, 42, 43, 44, 45, 32,
47, 48, 49, 50, 4, 1, 51, 46,
46, 6, 46, 46, 46, 52, 46, 53,
48, 54, 54, 4, 1, 51, 46, 46,
46, 46, 46, 46, 52, 46, 48, 54,
54, 4, 1, 51, 46, 46, 46, 46,
46, 46, 52, 46, 33, 46, 46, 46,
55, 56, 46, 1, 51, 46, 46, 46,
46, 46, 33, 46, 57, 57, 46, 1,
51, 46, 51, 46, 46, 58, 51, 46,
51, 46, 51, 46, 46, 46, 51, 46,
33, 46, 59, 46, 57, 57, 46, 1,
51, 46, 46, 46, 46, 46, 33, 46,
33, 46, 46, 46, 57, 57, 46, 1,
51, 46, 46, 46, 46, 46, 33, 46,
33, 46, 46, 46, 57, 56, 46, 1,
51, 46, 46, 46, 46, 46, 33, 46,
60, 61, 62, 62, 4, 1, 51, 46,
61, 62, 62, 4, 1, 51, 46, 62,
62, 4, 1, 51, 46, 63, 64, 64,
4, 1, 51, 46, 55, 65, 46, 1,
51, 46, 55, 46, 57, 57, 46, 1,
51, 46, 57, 65, 46, 1, 51, 46,
47, 48, 54, 54, 4, 1, 51, 46,
46, 46, 46, 46, 46, 52, 46, 47,
48, 49, 54, 4, 1, 51, 46, 46,
6, 46, 46, 46, 52, 46, 67, 68,
69, 70, 10, 11, 71, 66, 66, 14,
66, 66, 66, 72, 66, 73, 68, 74,
70, 10, 11, 71, 66, 66, 66, 66,
66, 66, 72, 66, 68, 74, 70, 10,
11, 71, 66, 66, 66, 66, 66, 66,
72, 66, 75, 66, 66, 66, 76, 77,
66, 11, 71, 66, 66, 66, 66, 66,
75, 66, 78, 68, 79, 80, 10, 11,
71, 66, 66, 13, 66, 66, 66, 72,
66, 81, 68, 74, 74, 10, 11, 71,
66, 66, 66, 66, 66, 66, 72, 66,
68, 74, 74, 10, 11, 71, 66, 66,
66, 66, 66, 66, 72, 66, 75, 66,
66, 66, 82, 77, 66, 11, 71, 66,
66, 66, 66, 66, 75, 66, 71, 66,
66, 83, 71, 66, 71, 66, 71, 66,
66, 66, 71, 66, 75, 66, 84, 66,
82, 82, 66, 11, 71, 66, 66, 66,
66, 66, 75, 66, 75, 66, 66, 66,
82, 82, 66, 11, 71, 66, 66, 66,
66, 66, 75, 66, 85, 86, 87, 87,
10, 11, 71, 66, 86, 87, 87, 10,
11, 71, 66, 87, 87, 10, 11, 71,
66, 88, 89, 89, 10, 11, 71, 66,
76, 90, 66, 11, 71, 66, 82, 82,
66, 11, 71, 66, 76, 66, 82, 82,
66, 11, 71, 66, 82, 90, 66, 11,
71, 66, 78, 68, 74, 74, 10, 11,
71, 66, 66, 66, 66, 66, 66, 72,
66, 78, 68, 79, 74, 10, 11, 71,
66, 66, 13, 66, 66, 66, 72, 66,
8, 9, 9, 10, 11, 66, 67, 68,
74, 70, 10, 11, 71, 66, 66, 66,
66, 66, 66, 72, 66, 92, 36, 93,
93, 18, 19, 39, 91, 91, 91, 91,
91, 91, 43, 91, 36, 93, 93, 18,
19, 39, 91, 91, 91, 91, 91, 91,
43, 91, 94, 91, 91, 91, 95, 96,
91, 19, 39, 91, 91, 91, 91, 91,
94, 91, 35, 36, 97, 98, 18, 19,
39, 91, 91, 20, 91, 91, 91, 43,
91, 94, 91, 91, 91, 99, 96, 91,
19, 39, 91, 91, 91, 91, 91, 94,
91, 39, 91, 91, 100, 39, 91, 39,
91, 39, 91, 91, 91, 39, 91, 94,
91, 101, 91, 99, 99, 91, 19, 39,
91, 91, 91, 91, 91, 94, 91, 94,
91, 91, 91, 99, 99, 91, 19, 39,
91, 91, 91, 91, 91, 94, 91, 102,
103, 104, 104, 18, 19, 39, 91, 103,
104, 104, 18, 19, 39, 91, 104, 104,
18, 19, 39, 91, 35, 36, 93, 93,
18, 19, 39, 91, 91, 91, 91, 91,
91, 43, 91, 105, 106, 106, 18, 19,
39, 91, 95, 107, 91, 19, 39, 91,
99, 99, 91, 19, 39, 91, 95, 91,
99, 99, 91, 19, 39, 91, 99, 107,
91, 19, 39, 91, 35, 36, 97, 93,
18, 19, 39, 91, 91, 20, 91, 91,
91, 43, 91, 16, 17, 17, 18, 19,
108, 108, 108, 20, 108, 16, 17, 17,
18, 19, 108, 110, 111, 112, 113, 26,
27, 114, 109, 109, 28, 109, 109, 109,
115, 109, 116, 111, 113, 113, 26, 27,
114, 109, 109, 109, 109, 109, 109, 115,
109, 111, 113, 113, 26, 27, 114, 109,
109, 109, 109, 109, 109, 115, 109, 117,
109, 109, 109, 118, 119, 109, 27, 114,
109, 109, 109, 109, 109, 117, 109, 110,
111, 112, 40, 26, 27, 114, 109, 109,
28, 109, 109, 109, 115, 109, 117, 109,
109, 109, 120, 119, 109, 27, 114, 109,
109, 109, 109, 109, 117, 109, 114, 109,
109, 121, 114, 109, 114, 109, 114, 109,
109, 109, 114, 109, 117, 109, 122, 109,
120, 120, 109, 27, 114, 109, 109, 109,
109, 109, 117, 109, 117, 109, 109, 109,
120, 120, 109, 27, 114, 109, 109, 109,
109, 109, 117, 109, 123, 124, 125, 125,
26, 27, 114, 109, 124, 125, 125, 26,
27, 114, 109, 125, 125, 26, 27, 114,
109, 110, 111, 113, 113, 26, 27, 114,
109, 109, 109, 109, 109, 109, 115, 109,
126, 127, 127, 26, 27, 114, 109, 118,
128, 109, 27, 114, 109, 120, 120, 109,
27, 114, 109, 118, 109, 120, 120, 109,
27, 114, 109, 120, 128, 109, 27, 114,
109, 33, 34, 35, 36, 97, 93, 18,
19, 39, 40, 40, 20, 91, 91, 33,
43, 91, 47, 129, 49, 50, 4, 1,
51, 46, 46, 6, 46, 46, 46, 52,
46, 33, 34, 35, 36, 130, 131, 18,
132, 133, 46, 40, 20, 46, 46, 33,
43, 46, 16, 134, 134, 18, 132, 51,
46, 46, 20, 46, 133, 46, 46, 135,
133, 46, 133, 46, 133, 46, 46, 46,
133, 46, 33, 46, 59, 16, 134, 134,
18, 132, 51, 46, 46, 46, 46, 46,
33, 46, 137, 136, 138, 138, 136, 31,
139, 136, 138, 138, 136, 31, 139, 136,
139, 136, 136, 140, 139, 136, 139, 136,
139, 136, 136, 136, 139, 136, 33, 108,
108, 108, 108, 108, 108, 108, 108, 40,
108, 108, 108, 108, 33, 108, 0
};
static const unsigned char _indic_syllable_machine_trans_targs[] = {
39, 45, 50, 2, 51, 5, 6, 53,
57, 58, 39, 67, 11, 73, 68, 14,
15, 75, 80, 81, 84, 39, 89, 21,
95, 90, 98, 39, 24, 25, 97, 103,
39, 112, 30, 118, 113, 121, 33, 34,
120, 126, 39, 137, 39, 40, 60, 85,
87, 105, 106, 91, 107, 127, 128, 99,
135, 140, 39, 41, 43, 8, 59, 46,
54, 42, 1, 44, 48, 0, 47, 49,
52, 3, 4, 55, 7, 56, 39, 61,
63, 18, 83, 69, 76, 62, 9, 64,
78, 71, 65, 17, 82, 66, 10, 70,
72, 74, 12, 13, 77, 16, 79, 39,
86, 26, 88, 101, 93, 19, 104, 20,
92, 94, 96, 22, 23, 100, 27, 102,
39, 39, 108, 110, 28, 35, 114, 122,
109, 111, 124, 116, 29, 115, 117, 119,
31, 32, 123, 36, 125, 129, 130, 134,
131, 132, 37, 133, 39, 136, 38, 138,
139
27, 33, 38, 2, 39, 45, 46, 27,
55, 8, 61, 56, 68, 69, 72, 27,
77, 15, 83, 78, 86, 27, 91, 27,
100, 21, 106, 101, 109, 114, 27, 125,
27, 28, 48, 73, 75, 93, 94, 79,
95, 115, 116, 87, 123, 128, 27, 29,
31, 5, 47, 34, 42, 30, 1, 32,
36, 0, 35, 37, 40, 41, 3, 43,
4, 44, 27, 49, 51, 12, 71, 57,
64, 50, 6, 52, 66, 59, 53, 11,
70, 54, 7, 58, 60, 62, 63, 9,
65, 10, 67, 27, 74, 17, 76, 89,
81, 13, 92, 14, 80, 82, 84, 85,
16, 88, 18, 90, 27, 27, 96, 98,
19, 23, 102, 110, 97, 99, 112, 104,
20, 103, 105, 107, 108, 22, 111, 24,
113, 117, 118, 122, 119, 120, 25, 121,
27, 124, 26, 126, 127
};
static const char _indic_syllable_machine_trans_actions[] = {
1, 0, 2, 0, 2, 0, 0, 2,
2, 2, 3, 2, 0, 2, 0, 0,
0, 2, 2, 2, 2, 4, 2, 0,
5, 0, 5, 6, 0, 0, 5, 2,
7, 2, 0, 2, 0, 2, 0, 0,
2, 2, 8, 0, 11, 2, 2, 5,
0, 12, 12, 0, 2, 5, 2, 5,
2, 0, 13, 2, 0, 0, 2, 0,
2, 2, 0, 2, 2, 0, 0, 2,
2, 0, 0, 0, 0, 2, 14, 2,
1, 0, 2, 0, 2, 2, 2, 3,
2, 0, 2, 0, 2, 2, 2, 4,
2, 0, 5, 0, 5, 6, 2, 7,
2, 0, 2, 0, 2, 2, 8, 0,
11, 2, 2, 5, 0, 12, 12, 0,
2, 5, 2, 5, 2, 0, 13, 2,
0, 0, 2, 0, 2, 2, 0, 2,
2, 2, 2, 0, 2, 2, 0, 0,
2, 2, 0, 0, 0, 0, 2, 15,
5, 0, 5, 2, 2, 0, 5, 0,
0, 2, 5, 0, 0, 0, 0, 2,
16, 17, 2, 0, 0, 0, 0, 2,
2, 2, 2, 2, 0, 0, 2, 2,
0, 0, 0, 0, 2, 0, 18, 18,
0, 0, 0, 0, 19, 2, 0, 0,
0
2, 0, 0, 2, 2, 2, 0, 0,
0, 2, 14, 2, 0, 0, 2, 0,
2, 2, 0, 2, 2, 2, 2, 0,
2, 2, 0, 0, 2, 2, 2, 0,
0, 0, 2, 15, 5, 0, 5, 2,
2, 0, 5, 0, 0, 2, 5, 5,
0, 0, 0, 2, 16, 17, 2, 0,
0, 0, 0, 2, 2, 2, 2, 2,
0, 0, 2, 2, 2, 0, 0, 0,
2, 0, 18, 18, 0, 0, 0, 0,
19, 2, 0, 0, 0
};
static const char _indic_syllable_machine_to_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 9,
0, 0, 0, 9, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@ -362,15 +349,14 @@ static const char _indic_syllable_machine_to_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0
0
};
static const char _indic_syllable_machine_from_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 10,
0, 0, 0, 10, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@ -383,42 +369,41 @@ static const char _indic_syllable_machine_from_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0
0
};
static const short _indic_syllable_machine_eof_trans[] = {
1, 1, 1, 1, 1, 1, 1, 1,
1, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 22, 22, 28, 22, 22,
22, 22, 22, 22, 33, 33, 33, 33,
33, 33, 33, 33, 33, 1, 43, 0,
59, 59, 59, 59, 59, 59, 59, 59,
59, 59, 59, 59, 59, 59, 59, 59,
59, 59, 59, 59, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 79, 79, 79,
79, 79, 79, 79, 79, 104, 104, 104,
104, 104, 104, 104, 104, 104, 104, 104,
104, 104, 104, 104, 104, 104, 104, 104,
104, 121, 121, 122, 122, 122, 122, 122,
122, 122, 122, 122, 122, 122, 122, 122,
122, 122, 122, 122, 122, 122, 122, 104,
59, 59, 59, 59, 59, 59, 59, 149,
149, 149, 149, 149, 121
1, 1, 1, 1, 1, 1, 8, 8,
8, 8, 8, 8, 8, 16, 16, 22,
16, 16, 16, 24, 24, 24, 24, 24,
24, 1, 31, 0, 47, 47, 47, 47,
47, 47, 47, 47, 47, 47, 47, 47,
47, 47, 47, 47, 47, 47, 47, 47,
67, 67, 67, 67, 67, 67, 67, 67,
67, 67, 67, 67, 67, 67, 67, 67,
67, 67, 67, 67, 67, 67, 67, 67,
67, 92, 92, 92, 92, 92, 92, 92,
92, 92, 92, 92, 92, 92, 92, 92,
92, 92, 92, 92, 92, 109, 109, 110,
110, 110, 110, 110, 110, 110, 110, 110,
110, 110, 110, 110, 110, 110, 110, 110,
110, 110, 110, 92, 47, 47, 47, 47,
47, 47, 47, 137, 137, 137, 137, 137,
109
};
static const int indic_syllable_machine_start = 39;
static const int indic_syllable_machine_first_final = 39;
static const int indic_syllable_machine_start = 27;
static const int indic_syllable_machine_first_final = 27;
static const int indic_syllable_machine_error = -1;
static const int indic_syllable_machine_en_main = 39;
static const int indic_syllable_machine_en_main = 27;
#line 58 "hb-ot-shaper-indic-machine.rl"
#line 118 "hb-ot-shaper-indic-machine.rl"
#line 117 "hb-ot-shaper-indic-machine.rl"
#define found_syllable(syllable_type) \
@ -437,7 +422,7 @@ find_syllables_indic (hb_buffer_t *buffer)
int cs;
hb_glyph_info_t *info = buffer->info;
#line 441 "hb-ot-shaper-indic-machine.hh"
#line 426 "hb-ot-shaper-indic-machine.hh"
{
cs = indic_syllable_machine_start;
ts = 0;
@ -445,7 +430,7 @@ find_syllables_indic (hb_buffer_t *buffer)
act = 0;
}
#line 138 "hb-ot-shaper-indic-machine.rl"
#line 137 "hb-ot-shaper-indic-machine.rl"
p = 0;
@ -453,7 +438,7 @@ find_syllables_indic (hb_buffer_t *buffer)
unsigned int syllable_serial = 1;
#line 457 "hb-ot-shaper-indic-machine.hh"
#line 442 "hb-ot-shaper-indic-machine.hh"
{
int _slen;
int _trans;
@ -467,7 +452,7 @@ _resume:
#line 1 "NONE"
{ts = p;}
break;
#line 471 "hb-ot-shaper-indic-machine.hh"
#line 456 "hb-ot-shaper-indic-machine.hh"
}
_keys = _indic_syllable_machine_trans_keys + (cs<<1);
@ -490,51 +475,51 @@ _eof_trans:
{te = p+1;}
break;
case 11:
#line 114 "hb-ot-shaper-indic-machine.rl"
#line 113 "hb-ot-shaper-indic-machine.rl"
{te = p+1;{ found_syllable (indic_non_indic_cluster); }}
break;
case 13:
#line 109 "hb-ot-shaper-indic-machine.rl"
#line 108 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_consonant_syllable); }}
break;
case 14:
#line 110 "hb-ot-shaper-indic-machine.rl"
#line 109 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_vowel_syllable); }}
break;
case 17:
#line 111 "hb-ot-shaper-indic-machine.rl"
#line 110 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_standalone_cluster); }}
break;
case 19:
#line 112 "hb-ot-shaper-indic-machine.rl"
#line 111 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_symbol_cluster); }}
break;
case 15:
#line 113 "hb-ot-shaper-indic-machine.rl"
#line 112 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break;
case 16:
#line 114 "hb-ot-shaper-indic-machine.rl"
#line 113 "hb-ot-shaper-indic-machine.rl"
{te = p;p--;{ found_syllable (indic_non_indic_cluster); }}
break;
case 1:
#line 109 "hb-ot-shaper-indic-machine.rl"
#line 108 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_consonant_syllable); }}
break;
case 3:
#line 110 "hb-ot-shaper-indic-machine.rl"
#line 109 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_vowel_syllable); }}
break;
case 7:
#line 111 "hb-ot-shaper-indic-machine.rl"
#line 110 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_standalone_cluster); }}
break;
case 8:
#line 112 "hb-ot-shaper-indic-machine.rl"
#line 111 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_symbol_cluster); }}
break;
case 4:
#line 113 "hb-ot-shaper-indic-machine.rl"
#line 112 "hb-ot-shaper-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }}
break;
case 6:
@ -555,22 +540,22 @@ _eof_trans:
case 18:
#line 1 "NONE"
{te = p+1;}
#line 109 "hb-ot-shaper-indic-machine.rl"
#line 108 "hb-ot-shaper-indic-machine.rl"
{act = 1;}
break;
case 5:
#line 1 "NONE"
{te = p+1;}
#line 113 "hb-ot-shaper-indic-machine.rl"
#line 112 "hb-ot-shaper-indic-machine.rl"
{act = 5;}
break;
case 12:
#line 1 "NONE"
{te = p+1;}
#line 114 "hb-ot-shaper-indic-machine.rl"
#line 113 "hb-ot-shaper-indic-machine.rl"
{act = 6;}
break;
#line 574 "hb-ot-shaper-indic-machine.hh"
#line 559 "hb-ot-shaper-indic-machine.hh"
}
_again:
@ -579,7 +564,7 @@ _again:
#line 1 "NONE"
{ts = 0;}
break;
#line 583 "hb-ot-shaper-indic-machine.hh"
#line 568 "hb-ot-shaper-indic-machine.hh"
}
if ( ++p != pe )
@ -595,7 +580,7 @@ _again:
}
#line 146 "hb-ot-shaper-indic-machine.rl"
#line 145 "hb-ot-shaper-indic-machine.rl"
}

View File

@ -87,9 +87,8 @@ z = ZWJ|ZWNJ; # is_joiner
reph = (Ra H | Repha); # possible reph
cn = c.ZWJ?.n?;
forced_rakar = ZWJ H ZWJ Ra;
symbol = Symbol.N?;
matra_group = z*.M.N?.(H | forced_rakar)?;
matra_group = z*.M.N?.H?;
syllable_tail = (z?.SM.SM?.ZWNJ?)? (A | VD)*;
halant_group = (z?.H.(ZWJ.N?)?);
final_halant_group = halant_group | H.ZWNJ;

View File

@ -75,26 +75,26 @@ static_assert (OT_VPst == M_Cat(VPst), "");
#define _OT_A OT_A /* 53 chars; A */
#define _OT_As OT_As /* 1 chars; As */
#define _OT_C OT_C /* 518 chars; C */
#define _OT_C OT_C /* 478 chars; C */
#define _OT_CM OT_CM /* 1 chars; CM */
#define _OT_CS OT_CS /* 2 chars; CS */
#define _OT_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */
#define _OT_H OT_H /* 12 chars; H */
#define _OT_M OT_M /* 160 chars; M */
#define _OT_H OT_H /* 11 chars; H */
#define _OT_M OT_M /* 143 chars; M */
#define _OT_MH OT_MH /* 1 chars; MH */
#define _OT_ML OT_ML /* 1 chars; ML */
#define _OT_MR OT_MR /* 1 chars; MR */
#define _OT_MW OT_MW /* 2 chars; MW */
#define _OT_MY OT_MY /* 3 chars; MY */
#define _OT_N OT_N /* 17 chars; N */
#define _OT_GB OT_PLACEHOLDER /* 175 chars; PLACEHOLDER */
#define _OT_GB OT_PLACEHOLDER /* 165 chars; PLACEHOLDER */
#define _OT_PT OT_PT /* 8 chars; PT */
#define _OT_R OT_Ra /* 15 chars; Ra */
#define _OT_R OT_Ra /* 14 chars; Ra */
#define _OT_Rf OT_Repha /* 1 chars; Repha */
#define _OT_Rt OT_Robatic /* 3 chars; Robatic */
#define _OT_SM OT_SM /* 58 chars; SM */
#define _OT_SM OT_SM /* 55 chars; SM */
#define _OT_S OT_Symbol /* 22 chars; Symbol */
#define _OT_V OT_V /* 190 chars; V */
#define _OT_V OT_V /* 172 chars; V */
#define _OT_VA OT_VAbv /* 18 chars; VAbv */
#define _OT_VB OT_VBlw /* 7 chars; VBlw */
#define _OT_VL OT_VPre /* 5 chars; VPre */
@ -106,18 +106,18 @@ static_assert (OT_VPst == M_Cat(VPst), "");
#define _OT_ZWJ OT_ZWJ /* 1 chars; ZWJ */
#define _OT_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */
#define _POS_T POS_ABOVE_C /* 23 chars; ABOVE_C */
#define _POS_T POS_ABOVE_C /* 22 chars; ABOVE_C */
#define _POS_A POS_AFTER_MAIN /* 3 chars; AFTER_MAIN */
#define _POS_AP POS_AFTER_POST /* 50 chars; AFTER_POST */
#define _POS_AS POS_AFTER_SUB /* 60 chars; AFTER_SUB */
#define _POS_C POS_BASE_C /* 902 chars; BASE_C */
#define _POS_AS POS_AFTER_SUB /* 45 chars; AFTER_SUB */
#define _POS_C POS_BASE_C /* 833 chars; BASE_C */
#define _POS_BS POS_BEFORE_SUB /* 31 chars; BEFORE_SUB */
#define _POS_B POS_BELOW_C /* 13 chars; BELOW_C */
#define _POS_X POS_END /* 71 chars; END */
#define _POS_R POS_POST_C /* 13 chars; POST_C */
#define _POS_L POS_PRE_C /* 5 chars; PRE_C */
#define _POS_LM POS_PRE_M /* 16 chars; PRE_M */
#define _POS_SM POS_SMVD /* 132 chars; SMVD */
#define _POS_LM POS_PRE_M /* 14 chars; PRE_M */
#define _POS_SM POS_SMVD /* 129 chars; SMVD */
#pragma GCC diagnostic pop
@ -323,25 +323,7 @@ static const uint16_t indic_table[] = {
/* 0D70 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* 0D78 */ _(X,X), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C),
/* Sinhala */
/* 0D80 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(V,C), _(V,C), _(V,C),
/* 0D88 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C),
/* 0D90 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X),
/* 0D98 */ _(X,X), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C),
/* 0DA0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C),
/* 0DA8 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C),
/* 0DB0 */ _(C,C), _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C),
/* 0DB8 */ _(C,C), _(C,C), _(C,C), _(R,C), _(X,X), _(C,C), _(X,X), _(X,X),
/* 0DC0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(X,X),
/* 0DC8 */ _(X,X), _(X,X), _(H,T), _(X,X), _(X,X), _(X,X), _(X,X), _(M,AS),
/* 0DD0 */ _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(X,X), _(M,AS), _(X,X),
/* 0DD8 */ _(M,AS), _(M,LM), _(M,AS), _(M,LM), _(M,AS), _(M,AS), _(M,AS), _(M,AS),
/* 0DE0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C),
/* 0DE8 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C),
/* 0DF0 */ _(X,X), _(X,X), _(M,AS), _(M,AS), _(X,X), _(X,X), _(X,X), _(X,X),
#define indic_offset_0x1000u 1336
#define indic_offset_0x1000u 1216
/* Myanmar */
@ -367,7 +349,7 @@ static const uint16_t indic_table[] = {
/* 1090 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C),
/* 1098 */ _(GB,C), _(GB,C),_(SM,SM),_(SM,SM),_(SM,SM), _(VA,T), _(X,X), _(X,X),
#define indic_offset_0x1780u 1496
#define indic_offset_0x1780u 1376
/* Khmer */
@ -387,7 +369,7 @@ static const uint16_t indic_table[] = {
/* 17E0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C),
/* 17E8 */ _(GB,C), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
#define indic_offset_0x1cd0u 1608
#define indic_offset_0x1cd0u 1488
/* Vedic Extensions */
@ -399,7 +381,7 @@ static const uint16_t indic_table[] = {
/* 1CF0 */ _(S,SM), _(S,SM), _(C,C), _(C,C), _(A,SM), _(C,C), _(C,C), _(A,SM),
/* 1CF8 */ _(A,SM), _(A,SM), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
#define indic_offset_0x2008u 1656
#define indic_offset_0x2008u 1536
/* General Punctuation */
@ -409,7 +391,7 @@ static const uint16_t indic_table[] = {
/* 2018 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* 2020 */ _(X,X), _(X,X), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
#define indic_offset_0x2070u 1688
#define indic_offset_0x2070u 1568
/* Superscripts and Subscripts */
@ -418,14 +400,14 @@ static const uint16_t indic_table[] = {
/* 2078 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* 2080 */ _(X,X), _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X),
#define indic_offset_0x25f8u 1712
#define indic_offset_0x25f8u 1592
/* Geometric Shapes */
/* 25F8 */ _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X),
#define indic_offset_0xa8e0u 1720
#define indic_offset_0xa8e0u 1600
/* Devanagari Extended */
@ -435,7 +417,7 @@ static const uint16_t indic_table[] = {
/* A8F0 */ _(A,SM), _(A,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM),
/* A8F8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(V,C), _(M,AS),
#define indic_offset_0xa9e0u 1752
#define indic_offset_0xa9e0u 1632
/* Myanmar Extended-B */
@ -445,7 +427,7 @@ static const uint16_t indic_table[] = {
/* A9F0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C),
/* A9F8 */ _(GB,C), _(GB,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(X,X),
#define indic_offset_0xaa60u 1784
#define indic_offset_0xaa60u 1664
/* Myanmar Extended-A */
@ -455,7 +437,7 @@ static const uint16_t indic_table[] = {
/* AA70 */ _(X,X), _(C,C), _(C,C), _(C,C), _(GB,C), _(GB,C), _(GB,C), _(X,X),
/* AA78 */ _(X,X), _(X,X), _(C,C), _(PT,X), _(N,X), _(N,X), _(C,C), _(C,C),
#define indic_offset_0xfe00u 1816
#define indic_offset_0xfe00u 1696
/* Variation Selectors */
@ -463,7 +445,7 @@ static const uint16_t indic_table[] = {
/* FE00 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X),
/* FE08 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X),
#define indic_offset_0x11300u 1832
#define indic_offset_0x11300u 1712
/* Grantha */
@ -477,7 +459,7 @@ static const uint16_t indic_table[] = {
/* 11330 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* 11338 */ _(X,X), _(X,X), _(X,X), _(N,X), _(N,X), _(X,X), _(X,X), _(X,X),
}; /* Table items: 1896; occupancy: 69% */
}; /* Table items: 1776; occupancy: 69% */
uint16_t
hb_indic_get_categories (hb_codepoint_t u)
@ -488,7 +470,7 @@ hb_indic_get_categories (hb_codepoint_t u)
if (unlikely (u == 0x00A0u)) return _(GB,C);
if (hb_in_range<hb_codepoint_t> (u, 0x0028u, 0x003Fu)) return indic_table[u - 0x0028u + indic_offset_0x0028u];
if (hb_in_range<hb_codepoint_t> (u, 0x00B0u, 0x00D7u)) return indic_table[u - 0x00B0u + indic_offset_0x00b0u];
if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0DF7u)) return indic_table[u - 0x0900u + indic_offset_0x0900u];
if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0D7Fu)) return indic_table[u - 0x0900u + indic_offset_0x0900u];
break;
case 0x1u:

View File

@ -122,10 +122,6 @@ struct hb_indic_would_substitute_feature_t
* instead of adding a new flag in these structs.
*/
enum base_position_t {
BASE_POS_LAST_SINHALA,
BASE_POS_LAST
};
enum reph_position_t {
REPH_POS_AFTER_MAIN = POS_AFTER_MAIN,
REPH_POS_BEFORE_SUB = POS_BEFORE_SUB,
@ -147,7 +143,6 @@ struct indic_config_t
hb_script_t script;
bool has_old_spec;
hb_codepoint_t virama;
base_position_t base_pos;
reph_position_t reph_pos;
reph_mode_t reph_mode;
blwf_mode_t blwf_mode;
@ -156,18 +151,16 @@ struct indic_config_t
static const indic_config_t indic_configs[] =
{
/* Default. Should be first. */
{HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_DEVANAGARI,true, 0x094Du,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_BENGALI, true, 0x09CDu,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_GURMUKHI, true, 0x0A4Du,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_GUJARATI, true, 0x0ACDu,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_ORIYA, true, 0x0B4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_TAMIL, true, 0x0BCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_TELUGU, true, 0x0C4Du,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
{HB_SCRIPT_KANNADA, true, 0x0CCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
{HB_SCRIPT_MALAYALAM, true, 0x0D4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_SINHALA, false,0x0DCAu,BASE_POS_LAST_SINHALA,
REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_INVALID, false, 0,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_DEVANAGARI,true, 0x094Du,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_BENGALI, true, 0x09CDu,REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_GURMUKHI, true, 0x0A4Du,REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_GUJARATI, true, 0x0ACDu,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_ORIYA, true, 0x0B4Du,REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_TAMIL, true, 0x0BCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
{HB_SCRIPT_TELUGU, true, 0x0C4Du,REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
{HB_SCRIPT_KANNADA, true, 0x0CCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
{HB_SCRIPT_MALAYALAM, true, 0x0D4Du,REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
};
@ -451,9 +444,6 @@ update_consonant_positions_indic (const hb_ot_shape_plan_t *plan,
{
const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data;
if (indic_plan->config->base_pos != BASE_POS_LAST)
return;
hb_codepoint_t virama;
if (indic_plan->load_virama_glyph (font, &virama))
{
@ -551,84 +541,51 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
has_reph = true;
}
switch (indic_plan->config->base_pos)
{
case BASE_POS_LAST:
{
/* -> starting from the end of the syllable, move backwards */
unsigned int i = end;
bool seen_below = false;
do {
i--;
/* -> until a consonant is found */
if (is_consonant (info[i]))
/* -> starting from the end of the syllable, move backwards */
unsigned int i = end;
bool seen_below = false;
do {
i--;
/* -> until a consonant is found */
if (is_consonant (info[i]))
{
/* -> that does not have a below-base or post-base form
* (post-base forms have to follow below-base forms), */
if (info[i].indic_position() != POS_BELOW_C &&
(info[i].indic_position() != POS_POST_C || seen_below))
{
/* -> that does not have a below-base or post-base form
* (post-base forms have to follow below-base forms), */
if (info[i].indic_position() != POS_BELOW_C &&
(info[i].indic_position() != POS_POST_C || seen_below))
{
base = i;
break;
}
if (info[i].indic_position() == POS_BELOW_C)
seen_below = true;
/* -> or that is not a pre-base-reordering Ra,
*
* IMPLEMENTATION NOTES:
*
* Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
* by the logic above already.
*/
/* -> or arrive at the first consonant. The consonant stopped at will
* be the base. */
base = i;
break;
}
else
{
/* A ZWJ after a Halant stops the base search, and requests an explicit
* half form.
* A ZWJ before a Halant, requests a subjoined form instead, and hence
* search continues. This is particularly important for Bengali
* sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
if (start < i &&
info[i].indic_category() == I_Cat(ZWJ) &&
info[i - 1].indic_category() == I_Cat(H))
break;
}
} while (i > limit);
}
break;
if (info[i].indic_position() == POS_BELOW_C)
seen_below = true;
case BASE_POS_LAST_SINHALA:
{
/* Sinhala base positioning is slightly different from main Indic, in that:
* 1. Its ZWJ behavior is different,
* 2. We don't need to look into the font for consonant positions.
*/
/* -> or that is not a pre-base-reordering Ra,
*
* IMPLEMENTATION NOTES:
*
* Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
* by the logic above already.
*/
if (!has_reph)
base = limit;
/* Find the last base consonant that is not blocked by ZWJ. If there is
* a ZWJ right before a base consonant, that would request a subjoined form. */
for (unsigned int i = limit; i < end; i++)
if (is_consonant (info[i]))
{
if (limit < i && info[i - 1].indic_category() == I_Cat(ZWJ))
break;
else
base = i;
}
/* Mark all subsequent consonants as below. */
for (unsigned int i = base + 1; i < end; i++)
if (is_consonant (info[i]))
info[i].indic_position() = POS_BELOW_C;
}
break;
/* -> or arrive at the first consonant. The consonant stopped at will
* be the base. */
base = i;
}
else
{
/* A ZWJ after a Halant stops the base search, and requests an explicit
* half form.
* A ZWJ before a Halant, requests a subjoined form instead, and hence
* search continues. This is particularly important for Bengali
* sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
if (start < i &&
info[i].indic_category() == I_Cat(ZWJ) &&
info[i - 1].indic_category() == I_Cat(H))
break;
}
} while (i > limit);
}
/* -> If the syllable starts with Ra + Halant (in a script that has Reph)
@ -683,18 +640,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
if (base < end)
info[base].indic_position() = POS_BASE_C;
/* Mark final consonants. A final consonant is one appearing after a matra.
* Happens in Sinhala. */
for (unsigned int i = base + 1; i < end; i++)
if (info[i].indic_category() == I_Cat(M)) {
for (unsigned int j = i + 1; j < end; j++)
if (is_consonant (info[j])) {
info[j].indic_position() = POS_FINAL_C;
break;
}
break;
}
/* Handle beginning Ra */
if (has_reph)
info[start].indic_position() = POS_RA_TO_BECOME_REPH;
@ -761,12 +706,8 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
{
/*
* Uniscribe doesn't move the Halant with Left Matra.
* TEST: U+092B,U+093F,U+094DE
* We follow. This is important for the Sinhala
* U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
* where U+0DD9 is a left matra and U+0DCA is the virama.
* We don't want to move the virama with the left matra.
* TEST: U+0D9A,U+0DDA
* TEST: U+092B,U+093F,U+094D
* We follow.
*/
for (unsigned int j = i; j > start; j--)
if (info[j - 1].indic_position() != POS_PRE_M) {
@ -1486,11 +1427,10 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan,
switch ((hb_tag_t) plan->props.script)
{
case HB_SCRIPT_TAMIL:
case HB_SCRIPT_SINHALA:
break;
default:
/* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala.
/* Uniscribe merges the entire syllable into a single cluster... Except for Tamil.
* This means, half forms are submerged into the main consonant's cluster.
* This is unnecessary, and makes cursor positioning harder, but that's what
* Uniscribe does. */
@ -1560,48 +1500,6 @@ decompose_indic (const hb_ot_shape_normalize_context_t *c,
#endif
}
if ((ab == 0x0DDAu || hb_in_range<hb_codepoint_t> (ab, 0x0DDCu, 0x0DDEu)))
{
/*
* Sinhala split matras... Let the fun begin.
*
* These four characters have Unicode decompositions. However, Uniscribe
* decomposes them "Khmer-style", that is, it uses the character itself to
* get the second half. The first half of all four decompositions is always
* U+0DD9.
*
* Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
* broken with Uniscribe. But we need to support them. As such, we only
* do the Uniscribe-style decomposition if the character is transformed into
* its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to
* Unicode decomposition.
*
* Note that we can't unconditionally use Unicode decomposition. That would
* break some other fonts, that are designed to work with Uniscribe, and
* don't have positioning features for the Unicode-style decomposition.
*
* Argh...
*
* The Uniscribe behavior is now documented in the newly published Sinhala
* spec in 2012:
*
* https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
*/
const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data;
hb_codepoint_t glyph;
if (indic_plan->uniscribe_bug_compatible ||
(c->font->get_nominal_glyph (ab, &glyph) &&
indic_plan->pstf.would_substitute (&glyph, 1, c->font->face)))
{
/* Ok, safe to use Uniscribe-style decomposition. */
*a = 0x0DD9u;
*b = ab;
return true;
}
}
return (bool) c->unicode->decompose (ab, a, b);
}

View File

@ -53,10 +53,9 @@ enum ot_position_t {
POS_POST_C = 11,
POS_AFTER_POST = 12,
POS_FINAL_C = 13,
POS_SMVD = 14,
POS_SMVD = 13,
POS_END = 15
POS_END = 14
};