diff --git a/docs/features.dot b/docs/features.dot index 6e48e6480..88cb3faeb 100644 --- a/docs/features.dot +++ b/docs/features.dot @@ -219,7 +219,7 @@ notes [fontname="Verdana",shape=box,label=<Indic scripts are: Bengali, Devanagari, Gujarati, Gurmukhi, Kannada, Malayalam, Oriya, Tamil, - Telugu, Sinhala + Telugu
@@ -240,7 +240,7 @@ Mongolian, Multani, Nandinagari, Newa, Nko, Nyiakeng Puachue Hmong, Old Sogdian, Pahawh Hmong, Phags Pa, Psalter Pahlavi, Rejang,
-Saurashtra, Sharada, Siddham, Sogdian, Soyombo, Sundanese, +Saurashtra, Sharada, Siddham, Sinhala, Sogdian, Soyombo, Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, Tai Viet,
diff --git a/docs/usermanual-shaping-concepts.xml b/docs/usermanual-shaping-concepts.xml index e1a2eceee..a95b0cbf6 100644 --- a/docs/usermanual-shaping-concepts.xml +++ b/docs/usermanual-shaping-concepts.xml @@ -237,7 +237,7 @@ The Indic shaping model handles the Indic scripts Bengali, Devanagari, Gujarati, Gurmukhi, Kannada, - Malayalam, Oriya, Tamil, Telugu, and Sinhala. + Malayalam, Oriya, Tamil, and Telugu. The Indic shaping model was revised significantly in diff --git a/docs/usermanual-what-is-harfbuzz.xml b/docs/usermanual-what-is-harfbuzz.xml index 4534783c2..fdf04b242 100644 --- a/docs/usermanual-what-is-harfbuzz.xml +++ b/docs/usermanual-what-is-harfbuzz.xml @@ -237,8 +237,7 @@ Indic (covering Devanagari, Bengali, Gujarati, - Gurmukhi, Kannada, Malayalam, Oriya, Tamil, Telugu, and - Sinhala) + Gurmukhi, Kannada, Malayalam, Oriya, Tamil, and Telugu) diff --git a/src/gen-indic-table.py b/src/gen-indic-table.py index f32f97106..a2004e4fc 100755 --- a/src/gen-indic-table.py +++ b/src/gen-indic-table.py @@ -26,7 +26,6 @@ ALLOWED_BLOCKS = [ 'Telugu', 'Kannada', 'Malayalam', - 'Sinhala', 'Myanmar', 'Khmer', 'Vedic Extensions', @@ -236,7 +235,6 @@ category_overrides = { 0x0C30: 'Ra', # Telugu Reph formed only with ZWJ 0x0CB0: 'Ra', # Kannada 0x0D30: 'Ra', # Malayalam No Reph, Logical Repha - 0x0DBB: 'Ra', # Sinhala Reph formed only with ZWJ # The following act more like the Bindus. 0x0953: 'SM', @@ -392,7 +390,6 @@ def matra_pos_right(u, block): if block == 'Telugu': return 'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB' if block == 'Kannada': return 'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB' if block == 'Malayalam': return 'AFTER_POST' - if block == 'Sinhala': return 'AFTER_SUB' return 'AFTER_SUB' def matra_pos_top(u, block): # BENG and MLYM don't have top matras. @@ -403,7 +400,6 @@ def matra_pos_top(u, block): if block == 'Tamil': return 'AFTER_SUB' if block == 'Telugu': return 'BEFORE_SUB' if block == 'Kannada': return 'BEFORE_SUB' - if block == 'Sinhala': return 'AFTER_SUB' return 'AFTER_SUB' def matra_pos_bottom(u, block): if block == 'Devanagari': return 'AFTER_SUB' @@ -415,7 +411,6 @@ def matra_pos_bottom(u, block): if block == 'Telugu': return 'BEFORE_SUB' if block == 'Kannada': return 'BEFORE_SUB' if block == 'Malayalam': return 'AFTER_POST' - if block == 'Sinhala': return 'AFTER_SUB' return "AFTER_SUB" def indic_matra_position(u, pos, block): # Reposition matra if pos == 'PRE_C': return matra_pos_left(u, block) diff --git a/src/hb-ot-shaper-indic-machine.hh b/src/hb-ot-shaper-indic-machine.hh index 9e36bc144..d52b13f61 100644 --- a/src/hb-ot-shaper-indic-machine.hh +++ b/src/hb-ot-shaper-indic-machine.hh @@ -77,279 +77,266 @@ enum indic_syllable_type_t { #line 79 "hb-ot-shaper-indic-machine.hh" static const unsigned char _indic_syllable_machine_trans_keys[] = { - 8u, 8u, 4u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 15u, 15u, 4u, 8u, - 4u, 12u, 4u, 8u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 15u, 15u, - 4u, 8u, 4u, 12u, 4u, 12u, 4u, 12u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, - 6u, 6u, 15u, 15u, 4u, 8u, 4u, 8u, 4u, 12u, 8u, 8u, 5u, 7u, 5u, 8u, - 4u, 8u, 6u, 6u, 15u, 15u, 4u, 8u, 4u, 8u, 5u, 8u, 8u, 8u, 1u, 18u, - 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 5u, 9u, 5u, 9u, 9u, 9u, 5u, 9u, - 1u, 15u, 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, 4u, 9u, 5u, 9u, - 3u, 9u, 5u, 9u, 3u, 16u, 3u, 16u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, - 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, - 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, 4u, 9u, 5u, 9u, 5u, 9u, 3u, 9u, - 5u, 9u, 3u, 16u, 3u, 16u, 4u, 8u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, - 3u, 16u, 1u, 15u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 1u, 15u, 3u, 9u, - 4u, 9u, 5u, 9u, 3u, 16u, 4u, 9u, 5u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, - 3u, 16u, 4u, 12u, 4u, 8u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 3u, 16u, - 1u, 15u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, - 5u, 9u, 3u, 16u, 4u, 9u, 5u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, 1u, 16u, - 3u, 16u, 1u, 16u, 4u, 12u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 3u, 9u, - 5u, 9u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 0 + 8u, 8u, 4u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 4u, 12u, 4u, 8u, 8u, 8u, + 5u, 7u, 5u, 8u, 4u, 8u, 4u, 12u, 4u, 12u, 4u, 12u, 8u, 8u, 5u, 7u, + 5u, 8u, 4u, 8u, 4u, 8u, 4u, 12u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, + 4u, 8u, 5u, 8u, 8u, 8u, 1u, 18u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, + 5u, 9u, 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 1u, 15u, 1u, 15u, 3u, 9u, + 4u, 9u, 5u, 9u, 4u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, 3u, 16u, 3u, 16u, + 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, + 5u, 9u, 9u, 9u, 5u, 9u, 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, + 4u, 9u, 5u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, 3u, 16u, 3u, 16u, 4u, 8u, + 3u, 16u, 3u, 16u, 4u, 16u, 1u, 15u, 3u, 16u, 1u, 15u, 5u, 9u, 9u, 9u, + 5u, 9u, 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, 3u, 16u, 4u, 9u, + 5u, 9u, 5u, 9u, 3u, 9u, 5u, 9u, 3u, 16u, 4u, 12u, 4u, 8u, 3u, 16u, + 3u, 16u, 4u, 16u, 1u, 15u, 3u, 16u, 1u, 15u, 5u, 9u, 9u, 9u, 5u, 9u, + 1u, 15u, 1u, 15u, 3u, 9u, 4u, 9u, 5u, 9u, 3u, 16u, 4u, 9u, 5u, 9u, + 5u, 9u, 3u, 9u, 5u, 9u, 1u, 16u, 3u, 16u, 1u, 16u, 4u, 12u, 5u, 9u, + 9u, 9u, 5u, 9u, 1u, 15u, 3u, 9u, 5u, 9u, 5u, 9u, 9u, 9u, 5u, 9u, + 1u, 15u, 0 }; static const char _indic_syllable_machine_key_spans[] = { - 1, 5, 3, 4, 5, 1, 1, 5, - 9, 5, 1, 3, 4, 5, 1, 1, - 5, 9, 9, 9, 1, 3, 4, 5, - 1, 1, 5, 5, 9, 1, 3, 4, - 5, 1, 1, 5, 5, 4, 1, 18, - 14, 14, 13, 15, 5, 5, 1, 5, - 15, 15, 15, 7, 6, 5, 6, 5, - 7, 5, 14, 14, 14, 14, 13, 15, - 14, 14, 13, 15, 5, 1, 5, 15, - 15, 7, 6, 5, 6, 5, 5, 7, - 5, 14, 14, 5, 14, 14, 13, 15, - 14, 15, 5, 1, 5, 15, 15, 7, - 6, 5, 14, 6, 5, 5, 7, 5, - 14, 9, 5, 14, 14, 13, 15, 14, - 15, 5, 1, 5, 15, 15, 7, 6, - 5, 14, 6, 5, 5, 7, 5, 16, - 14, 16, 9, 5, 1, 5, 15, 7, - 5, 5, 1, 5, 15 + 1, 5, 3, 4, 5, 9, 5, 1, + 3, 4, 5, 9, 9, 9, 1, 3, + 4, 5, 5, 9, 1, 3, 4, 5, + 5, 4, 1, 18, 14, 14, 13, 15, + 5, 5, 1, 5, 15, 15, 15, 7, + 6, 5, 6, 5, 7, 5, 14, 14, + 14, 14, 13, 15, 14, 14, 13, 15, + 5, 1, 5, 15, 15, 7, 6, 5, + 6, 5, 5, 7, 5, 14, 14, 5, + 14, 14, 13, 15, 14, 15, 5, 1, + 5, 15, 15, 7, 6, 5, 14, 6, + 5, 5, 7, 5, 14, 9, 5, 14, + 14, 13, 15, 14, 15, 5, 1, 5, + 15, 15, 7, 6, 5, 14, 6, 5, + 5, 7, 5, 16, 14, 16, 9, 5, + 1, 5, 15, 7, 5, 5, 1, 5, + 15 }; static const short _indic_syllable_machine_index_offsets[] = { - 0, 2, 8, 12, 17, 23, 25, 27, - 33, 43, 49, 51, 55, 60, 66, 68, - 70, 76, 86, 96, 106, 108, 112, 117, - 123, 125, 127, 133, 139, 149, 151, 155, - 160, 166, 168, 170, 176, 182, 187, 189, - 208, 223, 238, 252, 268, 274, 280, 282, - 288, 304, 320, 336, 344, 351, 357, 364, - 370, 378, 384, 399, 414, 429, 444, 458, - 474, 489, 504, 518, 534, 540, 542, 548, - 564, 580, 588, 595, 601, 608, 614, 620, - 628, 634, 649, 664, 670, 685, 700, 714, - 730, 745, 761, 767, 769, 775, 791, 807, - 815, 822, 828, 843, 850, 856, 862, 870, - 876, 891, 901, 907, 922, 937, 951, 967, - 982, 998, 1004, 1006, 1012, 1028, 1044, 1052, - 1059, 1065, 1080, 1087, 1093, 1099, 1107, 1113, - 1130, 1145, 1162, 1172, 1178, 1180, 1186, 1202, - 1210, 1216, 1222, 1224, 1230 + 0, 2, 8, 12, 17, 23, 33, 39, + 41, 45, 50, 56, 66, 76, 86, 88, + 92, 97, 103, 109, 119, 121, 125, 130, + 136, 142, 147, 149, 168, 183, 198, 212, + 228, 234, 240, 242, 248, 264, 280, 296, + 304, 311, 317, 324, 330, 338, 344, 359, + 374, 389, 404, 418, 434, 449, 464, 478, + 494, 500, 502, 508, 524, 540, 548, 555, + 561, 568, 574, 580, 588, 594, 609, 624, + 630, 645, 660, 674, 690, 705, 721, 727, + 729, 735, 751, 767, 775, 782, 788, 803, + 810, 816, 822, 830, 836, 851, 861, 867, + 882, 897, 911, 927, 942, 958, 964, 966, + 972, 988, 1004, 1012, 1019, 1025, 1040, 1047, + 1053, 1059, 1067, 1073, 1090, 1105, 1122, 1132, + 1138, 1140, 1146, 1162, 1170, 1176, 1182, 1184, + 1190 }; static const unsigned char _indic_syllable_machine_indicies[] = { 1, 0, 2, 3, 3, 4, 1, 0, 3, 3, 4, 0, 3, 3, 4, 1, - 0, 5, 3, 3, 4, 1, 0, 6, - 0, 7, 0, 8, 3, 3, 4, 1, - 0, 2, 3, 3, 4, 1, 0, 0, - 0, 9, 0, 11, 12, 12, 13, 14, - 10, 14, 10, 12, 12, 13, 10, 12, - 12, 13, 14, 10, 15, 12, 12, 13, - 14, 10, 16, 10, 17, 10, 18, 12, - 12, 13, 14, 10, 11, 12, 12, 13, - 14, 10, 10, 10, 19, 10, 11, 12, - 12, 13, 14, 10, 10, 10, 20, 10, - 22, 23, 23, 24, 25, 21, 21, 21, - 26, 21, 25, 21, 23, 23, 24, 27, - 23, 23, 24, 25, 21, 28, 23, 23, - 24, 25, 21, 29, 21, 30, 21, 22, - 23, 23, 24, 25, 21, 31, 23, 23, - 24, 25, 21, 33, 34, 34, 35, 36, - 32, 32, 32, 37, 32, 36, 32, 34, - 34, 35, 32, 34, 34, 35, 36, 32, - 38, 34, 34, 35, 36, 32, 39, 32, - 40, 32, 33, 34, 34, 35, 36, 32, - 41, 34, 34, 35, 36, 32, 23, 23, - 24, 1, 0, 43, 42, 45, 46, 47, - 48, 49, 50, 24, 25, 51, 52, 52, - 26, 44, 53, 54, 55, 56, 57, 44, - 59, 60, 61, 62, 4, 1, 63, 58, - 58, 9, 58, 58, 58, 64, 58, 65, - 60, 66, 66, 4, 1, 63, 58, 58, - 58, 58, 58, 58, 64, 58, 60, 66, - 66, 4, 1, 63, 58, 58, 58, 58, - 58, 58, 64, 58, 45, 58, 58, 58, - 67, 68, 58, 1, 63, 58, 58, 58, - 58, 58, 45, 58, 69, 69, 58, 1, - 63, 58, 63, 58, 58, 70, 63, 58, - 63, 58, 63, 58, 58, 58, 63, 58, - 45, 58, 71, 58, 69, 69, 58, 1, - 63, 58, 58, 58, 58, 58, 45, 58, - 45, 58, 58, 58, 69, 69, 58, 1, - 63, 58, 58, 58, 58, 58, 45, 58, - 45, 58, 58, 58, 69, 68, 58, 1, - 63, 58, 58, 58, 58, 58, 45, 58, - 72, 7, 73, 74, 4, 1, 63, 58, - 7, 73, 74, 4, 1, 63, 58, 73, - 73, 4, 1, 63, 58, 75, 76, 76, - 4, 1, 63, 58, 67, 77, 58, 1, - 63, 58, 67, 58, 69, 69, 58, 1, - 63, 58, 69, 77, 58, 1, 63, 58, - 59, 60, 66, 66, 4, 1, 63, 58, - 58, 58, 58, 58, 58, 64, 58, 59, - 60, 61, 66, 4, 1, 63, 58, 58, - 9, 58, 58, 58, 64, 58, 79, 80, - 81, 82, 13, 14, 83, 78, 78, 20, - 78, 78, 78, 84, 78, 85, 80, 86, - 82, 13, 14, 83, 78, 78, 78, 78, - 78, 78, 84, 78, 80, 86, 82, 13, - 14, 83, 78, 78, 78, 78, 78, 78, - 84, 78, 87, 78, 78, 78, 88, 89, - 78, 14, 83, 78, 78, 78, 78, 78, - 87, 78, 90, 80, 91, 92, 13, 14, - 83, 78, 78, 19, 78, 78, 78, 84, - 78, 93, 80, 86, 86, 13, 14, 83, - 78, 78, 78, 78, 78, 78, 84, 78, - 80, 86, 86, 13, 14, 83, 78, 78, - 78, 78, 78, 78, 84, 78, 87, 78, - 78, 78, 94, 89, 78, 14, 83, 78, - 78, 78, 78, 78, 87, 78, 83, 78, - 78, 95, 83, 78, 83, 78, 83, 78, - 78, 78, 83, 78, 87, 78, 96, 78, - 94, 94, 78, 14, 83, 78, 78, 78, - 78, 78, 87, 78, 87, 78, 78, 78, - 94, 94, 78, 14, 83, 78, 78, 78, - 78, 78, 87, 78, 97, 17, 98, 99, - 13, 14, 83, 78, 17, 98, 99, 13, - 14, 83, 78, 98, 98, 13, 14, 83, - 78, 100, 101, 101, 13, 14, 83, 78, - 88, 102, 78, 14, 83, 78, 94, 94, - 78, 14, 83, 78, 88, 78, 94, 94, - 78, 14, 83, 78, 94, 102, 78, 14, - 83, 78, 90, 80, 86, 86, 13, 14, - 83, 78, 78, 78, 78, 78, 78, 84, - 78, 90, 80, 91, 86, 13, 14, 83, - 78, 78, 19, 78, 78, 78, 84, 78, - 11, 12, 12, 13, 14, 78, 79, 80, - 86, 82, 13, 14, 83, 78, 78, 78, - 78, 78, 78, 84, 78, 104, 48, 105, - 105, 24, 25, 51, 103, 103, 103, 103, - 103, 103, 55, 103, 48, 105, 105, 24, - 25, 51, 103, 103, 103, 103, 103, 103, - 55, 103, 106, 103, 103, 103, 107, 108, - 103, 25, 51, 103, 103, 103, 103, 103, - 106, 103, 47, 48, 109, 110, 24, 25, - 51, 103, 103, 26, 103, 103, 103, 55, - 103, 106, 103, 103, 103, 111, 108, 103, - 25, 51, 103, 103, 103, 103, 103, 106, - 103, 51, 103, 103, 112, 51, 103, 51, - 103, 51, 103, 103, 103, 51, 103, 106, - 103, 113, 103, 111, 111, 103, 25, 51, - 103, 103, 103, 103, 103, 106, 103, 106, - 103, 103, 103, 111, 111, 103, 25, 51, - 103, 103, 103, 103, 103, 106, 103, 114, - 30, 115, 116, 24, 25, 51, 103, 30, - 115, 116, 24, 25, 51, 103, 115, 115, - 24, 25, 51, 103, 47, 48, 105, 105, - 24, 25, 51, 103, 103, 103, 103, 103, - 103, 55, 103, 117, 118, 118, 24, 25, - 51, 103, 107, 119, 103, 25, 51, 103, - 111, 111, 103, 25, 51, 103, 107, 103, - 111, 111, 103, 25, 51, 103, 111, 119, - 103, 25, 51, 103, 47, 48, 109, 105, - 24, 25, 51, 103, 103, 26, 103, 103, - 103, 55, 103, 22, 23, 23, 24, 25, - 120, 120, 120, 26, 120, 22, 23, 23, - 24, 25, 120, 122, 123, 124, 125, 35, - 36, 126, 121, 121, 37, 121, 121, 121, - 127, 121, 128, 123, 125, 125, 35, 36, - 126, 121, 121, 121, 121, 121, 121, 127, - 121, 123, 125, 125, 35, 36, 126, 121, - 121, 121, 121, 121, 121, 127, 121, 129, - 121, 121, 121, 130, 131, 121, 36, 126, - 121, 121, 121, 121, 121, 129, 121, 122, - 123, 124, 52, 35, 36, 126, 121, 121, - 37, 121, 121, 121, 127, 121, 129, 121, - 121, 121, 132, 131, 121, 36, 126, 121, - 121, 121, 121, 121, 129, 121, 126, 121, - 121, 133, 126, 121, 126, 121, 126, 121, - 121, 121, 126, 121, 129, 121, 134, 121, - 132, 132, 121, 36, 126, 121, 121, 121, - 121, 121, 129, 121, 129, 121, 121, 121, - 132, 132, 121, 36, 126, 121, 121, 121, - 121, 121, 129, 121, 135, 40, 136, 137, - 35, 36, 126, 121, 40, 136, 137, 35, - 36, 126, 121, 136, 136, 35, 36, 126, - 121, 122, 123, 125, 125, 35, 36, 126, - 121, 121, 121, 121, 121, 121, 127, 121, - 138, 139, 139, 35, 36, 126, 121, 130, - 140, 121, 36, 126, 121, 132, 132, 121, - 36, 126, 121, 130, 121, 132, 132, 121, - 36, 126, 121, 132, 140, 121, 36, 126, - 121, 45, 46, 47, 48, 109, 105, 24, - 25, 51, 52, 52, 26, 103, 103, 45, - 55, 103, 59, 141, 61, 62, 4, 1, - 63, 58, 58, 9, 58, 58, 58, 64, - 58, 45, 46, 47, 48, 142, 143, 24, - 144, 145, 58, 52, 26, 58, 58, 45, - 55, 58, 22, 146, 146, 24, 144, 63, - 58, 58, 26, 58, 145, 58, 58, 147, - 145, 58, 145, 58, 145, 58, 58, 58, - 145, 58, 45, 58, 71, 22, 146, 146, - 24, 144, 63, 58, 58, 58, 58, 58, - 45, 58, 149, 148, 150, 150, 148, 43, - 151, 148, 150, 150, 148, 43, 151, 148, - 151, 148, 148, 152, 151, 148, 151, 148, - 151, 148, 148, 148, 151, 148, 45, 120, - 120, 120, 120, 120, 120, 120, 120, 52, - 120, 120, 120, 120, 45, 120, 0 + 0, 5, 3, 3, 4, 1, 0, 2, + 3, 3, 4, 1, 0, 0, 0, 6, + 0, 8, 9, 9, 10, 11, 7, 11, + 7, 9, 9, 10, 7, 9, 9, 10, + 11, 7, 12, 9, 9, 10, 11, 7, + 8, 9, 9, 10, 11, 7, 7, 7, + 13, 7, 8, 9, 9, 10, 11, 7, + 7, 7, 14, 7, 16, 17, 17, 18, + 19, 15, 15, 15, 20, 15, 19, 15, + 17, 17, 18, 21, 17, 17, 18, 19, + 15, 16, 17, 17, 18, 19, 15, 22, + 17, 17, 18, 19, 15, 24, 25, 25, + 26, 27, 23, 23, 23, 28, 23, 27, + 23, 25, 25, 26, 23, 25, 25, 26, + 27, 23, 24, 25, 25, 26, 27, 23, + 29, 25, 25, 26, 27, 23, 17, 17, + 18, 1, 0, 31, 30, 33, 34, 35, + 36, 37, 38, 18, 19, 39, 40, 40, + 20, 32, 41, 42, 43, 44, 45, 32, + 47, 48, 49, 50, 4, 1, 51, 46, + 46, 6, 46, 46, 46, 52, 46, 53, + 48, 54, 54, 4, 1, 51, 46, 46, + 46, 46, 46, 46, 52, 46, 48, 54, + 54, 4, 1, 51, 46, 46, 46, 46, + 46, 46, 52, 46, 33, 46, 46, 46, + 55, 56, 46, 1, 51, 46, 46, 46, + 46, 46, 33, 46, 57, 57, 46, 1, + 51, 46, 51, 46, 46, 58, 51, 46, + 51, 46, 51, 46, 46, 46, 51, 46, + 33, 46, 59, 46, 57, 57, 46, 1, + 51, 46, 46, 46, 46, 46, 33, 46, + 33, 46, 46, 46, 57, 57, 46, 1, + 51, 46, 46, 46, 46, 46, 33, 46, + 33, 46, 46, 46, 57, 56, 46, 1, + 51, 46, 46, 46, 46, 46, 33, 46, + 60, 61, 62, 62, 4, 1, 51, 46, + 61, 62, 62, 4, 1, 51, 46, 62, + 62, 4, 1, 51, 46, 63, 64, 64, + 4, 1, 51, 46, 55, 65, 46, 1, + 51, 46, 55, 46, 57, 57, 46, 1, + 51, 46, 57, 65, 46, 1, 51, 46, + 47, 48, 54, 54, 4, 1, 51, 46, + 46, 46, 46, 46, 46, 52, 46, 47, + 48, 49, 54, 4, 1, 51, 46, 46, + 6, 46, 46, 46, 52, 46, 67, 68, + 69, 70, 10, 11, 71, 66, 66, 14, + 66, 66, 66, 72, 66, 73, 68, 74, + 70, 10, 11, 71, 66, 66, 66, 66, + 66, 66, 72, 66, 68, 74, 70, 10, + 11, 71, 66, 66, 66, 66, 66, 66, + 72, 66, 75, 66, 66, 66, 76, 77, + 66, 11, 71, 66, 66, 66, 66, 66, + 75, 66, 78, 68, 79, 80, 10, 11, + 71, 66, 66, 13, 66, 66, 66, 72, + 66, 81, 68, 74, 74, 10, 11, 71, + 66, 66, 66, 66, 66, 66, 72, 66, + 68, 74, 74, 10, 11, 71, 66, 66, + 66, 66, 66, 66, 72, 66, 75, 66, + 66, 66, 82, 77, 66, 11, 71, 66, + 66, 66, 66, 66, 75, 66, 71, 66, + 66, 83, 71, 66, 71, 66, 71, 66, + 66, 66, 71, 66, 75, 66, 84, 66, + 82, 82, 66, 11, 71, 66, 66, 66, + 66, 66, 75, 66, 75, 66, 66, 66, + 82, 82, 66, 11, 71, 66, 66, 66, + 66, 66, 75, 66, 85, 86, 87, 87, + 10, 11, 71, 66, 86, 87, 87, 10, + 11, 71, 66, 87, 87, 10, 11, 71, + 66, 88, 89, 89, 10, 11, 71, 66, + 76, 90, 66, 11, 71, 66, 82, 82, + 66, 11, 71, 66, 76, 66, 82, 82, + 66, 11, 71, 66, 82, 90, 66, 11, + 71, 66, 78, 68, 74, 74, 10, 11, + 71, 66, 66, 66, 66, 66, 66, 72, + 66, 78, 68, 79, 74, 10, 11, 71, + 66, 66, 13, 66, 66, 66, 72, 66, + 8, 9, 9, 10, 11, 66, 67, 68, + 74, 70, 10, 11, 71, 66, 66, 66, + 66, 66, 66, 72, 66, 92, 36, 93, + 93, 18, 19, 39, 91, 91, 91, 91, + 91, 91, 43, 91, 36, 93, 93, 18, + 19, 39, 91, 91, 91, 91, 91, 91, + 43, 91, 94, 91, 91, 91, 95, 96, + 91, 19, 39, 91, 91, 91, 91, 91, + 94, 91, 35, 36, 97, 98, 18, 19, + 39, 91, 91, 20, 91, 91, 91, 43, + 91, 94, 91, 91, 91, 99, 96, 91, + 19, 39, 91, 91, 91, 91, 91, 94, + 91, 39, 91, 91, 100, 39, 91, 39, + 91, 39, 91, 91, 91, 39, 91, 94, + 91, 101, 91, 99, 99, 91, 19, 39, + 91, 91, 91, 91, 91, 94, 91, 94, + 91, 91, 91, 99, 99, 91, 19, 39, + 91, 91, 91, 91, 91, 94, 91, 102, + 103, 104, 104, 18, 19, 39, 91, 103, + 104, 104, 18, 19, 39, 91, 104, 104, + 18, 19, 39, 91, 35, 36, 93, 93, + 18, 19, 39, 91, 91, 91, 91, 91, + 91, 43, 91, 105, 106, 106, 18, 19, + 39, 91, 95, 107, 91, 19, 39, 91, + 99, 99, 91, 19, 39, 91, 95, 91, + 99, 99, 91, 19, 39, 91, 99, 107, + 91, 19, 39, 91, 35, 36, 97, 93, + 18, 19, 39, 91, 91, 20, 91, 91, + 91, 43, 91, 16, 17, 17, 18, 19, + 108, 108, 108, 20, 108, 16, 17, 17, + 18, 19, 108, 110, 111, 112, 113, 26, + 27, 114, 109, 109, 28, 109, 109, 109, + 115, 109, 116, 111, 113, 113, 26, 27, + 114, 109, 109, 109, 109, 109, 109, 115, + 109, 111, 113, 113, 26, 27, 114, 109, + 109, 109, 109, 109, 109, 115, 109, 117, + 109, 109, 109, 118, 119, 109, 27, 114, + 109, 109, 109, 109, 109, 117, 109, 110, + 111, 112, 40, 26, 27, 114, 109, 109, + 28, 109, 109, 109, 115, 109, 117, 109, + 109, 109, 120, 119, 109, 27, 114, 109, + 109, 109, 109, 109, 117, 109, 114, 109, + 109, 121, 114, 109, 114, 109, 114, 109, + 109, 109, 114, 109, 117, 109, 122, 109, + 120, 120, 109, 27, 114, 109, 109, 109, + 109, 109, 117, 109, 117, 109, 109, 109, + 120, 120, 109, 27, 114, 109, 109, 109, + 109, 109, 117, 109, 123, 124, 125, 125, + 26, 27, 114, 109, 124, 125, 125, 26, + 27, 114, 109, 125, 125, 26, 27, 114, + 109, 110, 111, 113, 113, 26, 27, 114, + 109, 109, 109, 109, 109, 109, 115, 109, + 126, 127, 127, 26, 27, 114, 109, 118, + 128, 109, 27, 114, 109, 120, 120, 109, + 27, 114, 109, 118, 109, 120, 120, 109, + 27, 114, 109, 120, 128, 109, 27, 114, + 109, 33, 34, 35, 36, 97, 93, 18, + 19, 39, 40, 40, 20, 91, 91, 33, + 43, 91, 47, 129, 49, 50, 4, 1, + 51, 46, 46, 6, 46, 46, 46, 52, + 46, 33, 34, 35, 36, 130, 131, 18, + 132, 133, 46, 40, 20, 46, 46, 33, + 43, 46, 16, 134, 134, 18, 132, 51, + 46, 46, 20, 46, 133, 46, 46, 135, + 133, 46, 133, 46, 133, 46, 46, 46, + 133, 46, 33, 46, 59, 16, 134, 134, + 18, 132, 51, 46, 46, 46, 46, 46, + 33, 46, 137, 136, 138, 138, 136, 31, + 139, 136, 138, 138, 136, 31, 139, 136, + 139, 136, 136, 140, 139, 136, 139, 136, + 139, 136, 136, 136, 139, 136, 33, 108, + 108, 108, 108, 108, 108, 108, 108, 40, + 108, 108, 108, 108, 33, 108, 0 }; static const unsigned char _indic_syllable_machine_trans_targs[] = { - 39, 45, 50, 2, 51, 5, 6, 53, - 57, 58, 39, 67, 11, 73, 68, 14, - 15, 75, 80, 81, 84, 39, 89, 21, - 95, 90, 98, 39, 24, 25, 97, 103, - 39, 112, 30, 118, 113, 121, 33, 34, - 120, 126, 39, 137, 39, 40, 60, 85, - 87, 105, 106, 91, 107, 127, 128, 99, - 135, 140, 39, 41, 43, 8, 59, 46, - 54, 42, 1, 44, 48, 0, 47, 49, - 52, 3, 4, 55, 7, 56, 39, 61, - 63, 18, 83, 69, 76, 62, 9, 64, - 78, 71, 65, 17, 82, 66, 10, 70, - 72, 74, 12, 13, 77, 16, 79, 39, - 86, 26, 88, 101, 93, 19, 104, 20, - 92, 94, 96, 22, 23, 100, 27, 102, - 39, 39, 108, 110, 28, 35, 114, 122, - 109, 111, 124, 116, 29, 115, 117, 119, - 31, 32, 123, 36, 125, 129, 130, 134, - 131, 132, 37, 133, 39, 136, 38, 138, - 139 + 27, 33, 38, 2, 39, 45, 46, 27, + 55, 8, 61, 56, 68, 69, 72, 27, + 77, 15, 83, 78, 86, 27, 91, 27, + 100, 21, 106, 101, 109, 114, 27, 125, + 27, 28, 48, 73, 75, 93, 94, 79, + 95, 115, 116, 87, 123, 128, 27, 29, + 31, 5, 47, 34, 42, 30, 1, 32, + 36, 0, 35, 37, 40, 41, 3, 43, + 4, 44, 27, 49, 51, 12, 71, 57, + 64, 50, 6, 52, 66, 59, 53, 11, + 70, 54, 7, 58, 60, 62, 63, 9, + 65, 10, 67, 27, 74, 17, 76, 89, + 81, 13, 92, 14, 80, 82, 84, 85, + 16, 88, 18, 90, 27, 27, 96, 98, + 19, 23, 102, 110, 97, 99, 112, 104, + 20, 103, 105, 107, 108, 22, 111, 24, + 113, 117, 118, 122, 119, 120, 25, 121, + 27, 124, 26, 126, 127 }; static const char _indic_syllable_machine_trans_actions[] = { - 1, 0, 2, 0, 2, 0, 0, 2, - 2, 2, 3, 2, 0, 2, 0, 0, - 0, 2, 2, 2, 2, 4, 2, 0, - 5, 0, 5, 6, 0, 0, 5, 2, - 7, 2, 0, 2, 0, 2, 0, 0, - 2, 2, 8, 0, 11, 2, 2, 5, - 0, 12, 12, 0, 2, 5, 2, 5, - 2, 0, 13, 2, 0, 0, 2, 0, - 2, 2, 0, 2, 2, 0, 0, 2, - 2, 0, 0, 0, 0, 2, 14, 2, + 1, 0, 2, 0, 2, 2, 2, 3, + 2, 0, 2, 0, 2, 2, 2, 4, + 2, 0, 5, 0, 5, 6, 2, 7, + 2, 0, 2, 0, 2, 2, 8, 0, + 11, 2, 2, 5, 0, 12, 12, 0, + 2, 5, 2, 5, 2, 0, 13, 2, 0, 0, 2, 0, 2, 2, 0, 2, - 2, 2, 2, 0, 2, 2, 0, 0, - 2, 2, 0, 0, 0, 0, 2, 15, - 5, 0, 5, 2, 2, 0, 5, 0, - 0, 2, 5, 0, 0, 0, 0, 2, - 16, 17, 2, 0, 0, 0, 0, 2, - 2, 2, 2, 2, 0, 0, 2, 2, - 0, 0, 0, 0, 2, 0, 18, 18, - 0, 0, 0, 0, 19, 2, 0, 0, - 0 + 2, 0, 0, 2, 2, 2, 0, 0, + 0, 2, 14, 2, 0, 0, 2, 0, + 2, 2, 0, 2, 2, 2, 2, 0, + 2, 2, 0, 0, 2, 2, 2, 0, + 0, 0, 2, 15, 5, 0, 5, 2, + 2, 0, 5, 0, 0, 2, 5, 5, + 0, 0, 0, 2, 16, 17, 2, 0, + 0, 0, 0, 2, 2, 2, 2, 2, + 0, 0, 2, 2, 2, 0, 0, 0, + 2, 0, 18, 18, 0, 0, 0, 0, + 19, 2, 0, 0, 0 }; static const char _indic_syllable_machine_to_state_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 9, + 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -362,15 +349,14 @@ static const char _indic_syllable_machine_to_state_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 + 0 }; static const char _indic_syllable_machine_from_state_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 10, + 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -383,42 +369,41 @@ static const char _indic_syllable_machine_from_state_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 + 0 }; static const short _indic_syllable_machine_eof_trans[] = { - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 22, 22, 28, 22, 22, - 22, 22, 22, 22, 33, 33, 33, 33, - 33, 33, 33, 33, 33, 1, 43, 0, - 59, 59, 59, 59, 59, 59, 59, 59, - 59, 59, 59, 59, 59, 59, 59, 59, - 59, 59, 59, 59, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 104, 104, 104, - 104, 104, 104, 104, 104, 104, 104, 104, - 104, 104, 104, 104, 104, 104, 104, 104, - 104, 121, 121, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 104, - 59, 59, 59, 59, 59, 59, 59, 149, - 149, 149, 149, 149, 121 + 1, 1, 1, 1, 1, 1, 8, 8, + 8, 8, 8, 8, 8, 16, 16, 22, + 16, 16, 16, 24, 24, 24, 24, 24, + 24, 1, 31, 0, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 67, 92, 92, 92, 92, 92, 92, 92, + 92, 92, 92, 92, 92, 92, 92, 92, + 92, 92, 92, 92, 92, 109, 109, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 110, 110, 110, 110, 110, + 110, 110, 110, 92, 47, 47, 47, 47, + 47, 47, 47, 137, 137, 137, 137, 137, + 109 }; -static const int indic_syllable_machine_start = 39; -static const int indic_syllable_machine_first_final = 39; +static const int indic_syllable_machine_start = 27; +static const int indic_syllable_machine_first_final = 27; static const int indic_syllable_machine_error = -1; -static const int indic_syllable_machine_en_main = 39; +static const int indic_syllable_machine_en_main = 27; #line 58 "hb-ot-shaper-indic-machine.rl" -#line 118 "hb-ot-shaper-indic-machine.rl" +#line 117 "hb-ot-shaper-indic-machine.rl" #define found_syllable(syllable_type) \ @@ -437,7 +422,7 @@ find_syllables_indic (hb_buffer_t *buffer) int cs; hb_glyph_info_t *info = buffer->info; -#line 441 "hb-ot-shaper-indic-machine.hh" +#line 426 "hb-ot-shaper-indic-machine.hh" { cs = indic_syllable_machine_start; ts = 0; @@ -445,7 +430,7 @@ find_syllables_indic (hb_buffer_t *buffer) act = 0; } -#line 138 "hb-ot-shaper-indic-machine.rl" +#line 137 "hb-ot-shaper-indic-machine.rl" p = 0; @@ -453,7 +438,7 @@ find_syllables_indic (hb_buffer_t *buffer) unsigned int syllable_serial = 1; -#line 457 "hb-ot-shaper-indic-machine.hh" +#line 442 "hb-ot-shaper-indic-machine.hh" { int _slen; int _trans; @@ -467,7 +452,7 @@ _resume: #line 1 "NONE" {ts = p;} break; -#line 471 "hb-ot-shaper-indic-machine.hh" +#line 456 "hb-ot-shaper-indic-machine.hh" } _keys = _indic_syllable_machine_trans_keys + (cs<<1); @@ -490,51 +475,51 @@ _eof_trans: {te = p+1;} break; case 11: -#line 114 "hb-ot-shaper-indic-machine.rl" +#line 113 "hb-ot-shaper-indic-machine.rl" {te = p+1;{ found_syllable (indic_non_indic_cluster); }} break; case 13: -#line 109 "hb-ot-shaper-indic-machine.rl" +#line 108 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_consonant_syllable); }} break; case 14: -#line 110 "hb-ot-shaper-indic-machine.rl" +#line 109 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_vowel_syllable); }} break; case 17: -#line 111 "hb-ot-shaper-indic-machine.rl" +#line 110 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_standalone_cluster); }} break; case 19: -#line 112 "hb-ot-shaper-indic-machine.rl" +#line 111 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_symbol_cluster); }} break; case 15: -#line 113 "hb-ot-shaper-indic-machine.rl" +#line 112 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 16: -#line 114 "hb-ot-shaper-indic-machine.rl" +#line 113 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_non_indic_cluster); }} break; case 1: -#line 109 "hb-ot-shaper-indic-machine.rl" +#line 108 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_consonant_syllable); }} break; case 3: -#line 110 "hb-ot-shaper-indic-machine.rl" +#line 109 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_vowel_syllable); }} break; case 7: -#line 111 "hb-ot-shaper-indic-machine.rl" +#line 110 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_standalone_cluster); }} break; case 8: -#line 112 "hb-ot-shaper-indic-machine.rl" +#line 111 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_symbol_cluster); }} break; case 4: -#line 113 "hb-ot-shaper-indic-machine.rl" +#line 112 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 6: @@ -555,22 +540,22 @@ _eof_trans: case 18: #line 1 "NONE" {te = p+1;} -#line 109 "hb-ot-shaper-indic-machine.rl" +#line 108 "hb-ot-shaper-indic-machine.rl" {act = 1;} break; case 5: #line 1 "NONE" {te = p+1;} -#line 113 "hb-ot-shaper-indic-machine.rl" +#line 112 "hb-ot-shaper-indic-machine.rl" {act = 5;} break; case 12: #line 1 "NONE" {te = p+1;} -#line 114 "hb-ot-shaper-indic-machine.rl" +#line 113 "hb-ot-shaper-indic-machine.rl" {act = 6;} break; -#line 574 "hb-ot-shaper-indic-machine.hh" +#line 559 "hb-ot-shaper-indic-machine.hh" } _again: @@ -579,7 +564,7 @@ _again: #line 1 "NONE" {ts = 0;} break; -#line 583 "hb-ot-shaper-indic-machine.hh" +#line 568 "hb-ot-shaper-indic-machine.hh" } if ( ++p != pe ) @@ -595,7 +580,7 @@ _again: } -#line 146 "hb-ot-shaper-indic-machine.rl" +#line 145 "hb-ot-shaper-indic-machine.rl" } diff --git a/src/hb-ot-shaper-indic-machine.rl b/src/hb-ot-shaper-indic-machine.rl index 5c9bb0938..3274a776a 100644 --- a/src/hb-ot-shaper-indic-machine.rl +++ b/src/hb-ot-shaper-indic-machine.rl @@ -87,9 +87,8 @@ z = ZWJ|ZWNJ; # is_joiner reph = (Ra H | Repha); # possible reph cn = c.ZWJ?.n?; -forced_rakar = ZWJ H ZWJ Ra; symbol = Symbol.N?; -matra_group = z*.M.N?.(H | forced_rakar)?; +matra_group = z*.M.N?.H?; syllable_tail = (z?.SM.SM?.ZWNJ?)? (A | VD)*; halant_group = (z?.H.(ZWJ.N?)?); final_halant_group = halant_group | H.ZWNJ; diff --git a/src/hb-ot-shaper-indic-table.cc b/src/hb-ot-shaper-indic-table.cc index 6734e7f89..3d3698d73 100644 --- a/src/hb-ot-shaper-indic-table.cc +++ b/src/hb-ot-shaper-indic-table.cc @@ -75,26 +75,26 @@ static_assert (OT_VPst == M_Cat(VPst), ""); #define _OT_A OT_A /* 53 chars; A */ #define _OT_As OT_As /* 1 chars; As */ -#define _OT_C OT_C /* 518 chars; C */ +#define _OT_C OT_C /* 478 chars; C */ #define _OT_CM OT_CM /* 1 chars; CM */ #define _OT_CS OT_CS /* 2 chars; CS */ #define _OT_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */ -#define _OT_H OT_H /* 12 chars; H */ -#define _OT_M OT_M /* 160 chars; M */ +#define _OT_H OT_H /* 11 chars; H */ +#define _OT_M OT_M /* 143 chars; M */ #define _OT_MH OT_MH /* 1 chars; MH */ #define _OT_ML OT_ML /* 1 chars; ML */ #define _OT_MR OT_MR /* 1 chars; MR */ #define _OT_MW OT_MW /* 2 chars; MW */ #define _OT_MY OT_MY /* 3 chars; MY */ #define _OT_N OT_N /* 17 chars; N */ -#define _OT_GB OT_PLACEHOLDER /* 175 chars; PLACEHOLDER */ +#define _OT_GB OT_PLACEHOLDER /* 165 chars; PLACEHOLDER */ #define _OT_PT OT_PT /* 8 chars; PT */ -#define _OT_R OT_Ra /* 15 chars; Ra */ +#define _OT_R OT_Ra /* 14 chars; Ra */ #define _OT_Rf OT_Repha /* 1 chars; Repha */ #define _OT_Rt OT_Robatic /* 3 chars; Robatic */ -#define _OT_SM OT_SM /* 58 chars; SM */ +#define _OT_SM OT_SM /* 55 chars; SM */ #define _OT_S OT_Symbol /* 22 chars; Symbol */ -#define _OT_V OT_V /* 190 chars; V */ +#define _OT_V OT_V /* 172 chars; V */ #define _OT_VA OT_VAbv /* 18 chars; VAbv */ #define _OT_VB OT_VBlw /* 7 chars; VBlw */ #define _OT_VL OT_VPre /* 5 chars; VPre */ @@ -106,18 +106,18 @@ static_assert (OT_VPst == M_Cat(VPst), ""); #define _OT_ZWJ OT_ZWJ /* 1 chars; ZWJ */ #define _OT_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */ -#define _POS_T POS_ABOVE_C /* 23 chars; ABOVE_C */ +#define _POS_T POS_ABOVE_C /* 22 chars; ABOVE_C */ #define _POS_A POS_AFTER_MAIN /* 3 chars; AFTER_MAIN */ #define _POS_AP POS_AFTER_POST /* 50 chars; AFTER_POST */ -#define _POS_AS POS_AFTER_SUB /* 60 chars; AFTER_SUB */ -#define _POS_C POS_BASE_C /* 902 chars; BASE_C */ +#define _POS_AS POS_AFTER_SUB /* 45 chars; AFTER_SUB */ +#define _POS_C POS_BASE_C /* 833 chars; BASE_C */ #define _POS_BS POS_BEFORE_SUB /* 31 chars; BEFORE_SUB */ #define _POS_B POS_BELOW_C /* 13 chars; BELOW_C */ #define _POS_X POS_END /* 71 chars; END */ #define _POS_R POS_POST_C /* 13 chars; POST_C */ #define _POS_L POS_PRE_C /* 5 chars; PRE_C */ -#define _POS_LM POS_PRE_M /* 16 chars; PRE_M */ -#define _POS_SM POS_SMVD /* 132 chars; SMVD */ +#define _POS_LM POS_PRE_M /* 14 chars; PRE_M */ +#define _POS_SM POS_SMVD /* 129 chars; SMVD */ #pragma GCC diagnostic pop @@ -323,25 +323,7 @@ static const uint16_t indic_table[] = { /* 0D70 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 0D78 */ _(X,X), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), - /* Sinhala */ - - /* 0D80 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(V,C), _(V,C), _(V,C), - /* 0D88 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), - /* 0D90 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), - /* 0D98 */ _(X,X), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), - /* 0DA0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), - /* 0DA8 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), - /* 0DB0 */ _(C,C), _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), - /* 0DB8 */ _(C,C), _(C,C), _(C,C), _(R,C), _(X,X), _(C,C), _(X,X), _(X,X), - /* 0DC0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(X,X), - /* 0DC8 */ _(X,X), _(X,X), _(H,T), _(X,X), _(X,X), _(X,X), _(X,X), _(M,AS), - /* 0DD0 */ _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(X,X), _(M,AS), _(X,X), - /* 0DD8 */ _(M,AS), _(M,LM), _(M,AS), _(M,LM), _(M,AS), _(M,AS), _(M,AS), _(M,AS), - /* 0DE0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), - /* 0DE8 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), - /* 0DF0 */ _(X,X), _(X,X), _(M,AS), _(M,AS), _(X,X), _(X,X), _(X,X), _(X,X), - -#define indic_offset_0x1000u 1336 +#define indic_offset_0x1000u 1216 /* Myanmar */ @@ -367,7 +349,7 @@ static const uint16_t indic_table[] = { /* 1090 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), /* 1098 */ _(GB,C), _(GB,C),_(SM,SM),_(SM,SM),_(SM,SM), _(VA,T), _(X,X), _(X,X), -#define indic_offset_0x1780u 1496 +#define indic_offset_0x1780u 1376 /* Khmer */ @@ -387,7 +369,7 @@ static const uint16_t indic_table[] = { /* 17E0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), /* 17E8 */ _(GB,C), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), -#define indic_offset_0x1cd0u 1608 +#define indic_offset_0x1cd0u 1488 /* Vedic Extensions */ @@ -399,7 +381,7 @@ static const uint16_t indic_table[] = { /* 1CF0 */ _(S,SM), _(S,SM), _(C,C), _(C,C), _(A,SM), _(C,C), _(C,C), _(A,SM), /* 1CF8 */ _(A,SM), _(A,SM), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), -#define indic_offset_0x2008u 1656 +#define indic_offset_0x2008u 1536 /* General Punctuation */ @@ -409,7 +391,7 @@ static const uint16_t indic_table[] = { /* 2018 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 2020 */ _(X,X), _(X,X), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), -#define indic_offset_0x2070u 1688 +#define indic_offset_0x2070u 1568 /* Superscripts and Subscripts */ @@ -418,14 +400,14 @@ static const uint16_t indic_table[] = { /* 2078 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 2080 */ _(X,X), _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X), -#define indic_offset_0x25f8u 1712 +#define indic_offset_0x25f8u 1592 /* Geometric Shapes */ /* 25F8 */ _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), -#define indic_offset_0xa8e0u 1720 +#define indic_offset_0xa8e0u 1600 /* Devanagari Extended */ @@ -435,7 +417,7 @@ static const uint16_t indic_table[] = { /* A8F0 */ _(A,SM), _(A,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), /* A8F8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(V,C), _(M,AS), -#define indic_offset_0xa9e0u 1752 +#define indic_offset_0xa9e0u 1632 /* Myanmar Extended-B */ @@ -445,7 +427,7 @@ static const uint16_t indic_table[] = { /* A9F0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), /* A9F8 */ _(GB,C), _(GB,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(X,X), -#define indic_offset_0xaa60u 1784 +#define indic_offset_0xaa60u 1664 /* Myanmar Extended-A */ @@ -455,7 +437,7 @@ static const uint16_t indic_table[] = { /* AA70 */ _(X,X), _(C,C), _(C,C), _(C,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), /* AA78 */ _(X,X), _(X,X), _(C,C), _(PT,X), _(N,X), _(N,X), _(C,C), _(C,C), -#define indic_offset_0xfe00u 1816 +#define indic_offset_0xfe00u 1696 /* Variation Selectors */ @@ -463,7 +445,7 @@ static const uint16_t indic_table[] = { /* FE00 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), /* FE08 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), -#define indic_offset_0x11300u 1832 +#define indic_offset_0x11300u 1712 /* Grantha */ @@ -477,7 +459,7 @@ static const uint16_t indic_table[] = { /* 11330 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 11338 */ _(X,X), _(X,X), _(X,X), _(N,X), _(N,X), _(X,X), _(X,X), _(X,X), -}; /* Table items: 1896; occupancy: 69% */ +}; /* Table items: 1776; occupancy: 69% */ uint16_t hb_indic_get_categories (hb_codepoint_t u) @@ -488,7 +470,7 @@ hb_indic_get_categories (hb_codepoint_t u) if (unlikely (u == 0x00A0u)) return _(GB,C); if (hb_in_range (u, 0x0028u, 0x003Fu)) return indic_table[u - 0x0028u + indic_offset_0x0028u]; if (hb_in_range (u, 0x00B0u, 0x00D7u)) return indic_table[u - 0x00B0u + indic_offset_0x00b0u]; - if (hb_in_range (u, 0x0900u, 0x0DF7u)) return indic_table[u - 0x0900u + indic_offset_0x0900u]; + if (hb_in_range (u, 0x0900u, 0x0D7Fu)) return indic_table[u - 0x0900u + indic_offset_0x0900u]; break; case 0x1u: diff --git a/src/hb-ot-shaper-indic.cc b/src/hb-ot-shaper-indic.cc index 00e0dd919..48a3c7446 100644 --- a/src/hb-ot-shaper-indic.cc +++ b/src/hb-ot-shaper-indic.cc @@ -122,10 +122,6 @@ struct hb_indic_would_substitute_feature_t * instead of adding a new flag in these structs. */ -enum base_position_t { - BASE_POS_LAST_SINHALA, - BASE_POS_LAST -}; enum reph_position_t { REPH_POS_AFTER_MAIN = POS_AFTER_MAIN, REPH_POS_BEFORE_SUB = POS_BEFORE_SUB, @@ -147,7 +143,6 @@ struct indic_config_t hb_script_t script; bool has_old_spec; hb_codepoint_t virama; - base_position_t base_pos; reph_position_t reph_pos; reph_mode_t reph_mode; blwf_mode_t blwf_mode; @@ -156,18 +151,16 @@ struct indic_config_t static const indic_config_t indic_configs[] = { /* Default. Should be first. */ - {HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, - {HB_SCRIPT_DEVANAGARI,true, 0x094Du,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, - {HB_SCRIPT_BENGALI, true, 0x09CDu,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, - {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, - {HB_SCRIPT_GUJARATI, true, 0x0ACDu,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, - {HB_SCRIPT_ORIYA, true, 0x0B4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, - {HB_SCRIPT_TAMIL, true, 0x0BCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, - {HB_SCRIPT_TELUGU, true, 0x0C4Du,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY}, - {HB_SCRIPT_KANNADA, true, 0x0CCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY}, - {HB_SCRIPT_MALAYALAM, true, 0x0D4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST}, - {HB_SCRIPT_SINHALA, false,0x0DCAu,BASE_POS_LAST_SINHALA, - REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_INVALID, false, 0,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_DEVANAGARI,true, 0x094Du,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_BENGALI, true, 0x09CDu,REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_GUJARATI, true, 0x0ACDu,REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_ORIYA, true, 0x0B4Du,REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_TAMIL, true, 0x0BCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_TELUGU, true, 0x0C4Du,REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY}, + {HB_SCRIPT_KANNADA, true, 0x0CCDu,REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY}, + {HB_SCRIPT_MALAYALAM, true, 0x0D4Du,REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST}, }; @@ -451,9 +444,6 @@ update_consonant_positions_indic (const hb_ot_shape_plan_t *plan, { const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; - if (indic_plan->config->base_pos != BASE_POS_LAST) - return; - hb_codepoint_t virama; if (indic_plan->load_virama_glyph (font, &virama)) { @@ -551,84 +541,51 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, has_reph = true; } - switch (indic_plan->config->base_pos) { - case BASE_POS_LAST: - { - /* -> starting from the end of the syllable, move backwards */ - unsigned int i = end; - bool seen_below = false; - do { - i--; - /* -> until a consonant is found */ - if (is_consonant (info[i])) + /* -> starting from the end of the syllable, move backwards */ + unsigned int i = end; + bool seen_below = false; + do { + i--; + /* -> until a consonant is found */ + if (is_consonant (info[i])) + { + /* -> that does not have a below-base or post-base form + * (post-base forms have to follow below-base forms), */ + if (info[i].indic_position() != POS_BELOW_C && + (info[i].indic_position() != POS_POST_C || seen_below)) { - /* -> that does not have a below-base or post-base form - * (post-base forms have to follow below-base forms), */ - if (info[i].indic_position() != POS_BELOW_C && - (info[i].indic_position() != POS_POST_C || seen_below)) - { - base = i; - break; - } - if (info[i].indic_position() == POS_BELOW_C) - seen_below = true; - - /* -> or that is not a pre-base-reordering Ra, - * - * IMPLEMENTATION NOTES: - * - * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped - * by the logic above already. - */ - - /* -> or arrive at the first consonant. The consonant stopped at will - * be the base. */ base = i; + break; } - else - { - /* A ZWJ after a Halant stops the base search, and requests an explicit - * half form. - * A ZWJ before a Halant, requests a subjoined form instead, and hence - * search continues. This is particularly important for Bengali - * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */ - if (start < i && - info[i].indic_category() == I_Cat(ZWJ) && - info[i - 1].indic_category() == I_Cat(H)) - break; - } - } while (i > limit); - } - break; + if (info[i].indic_position() == POS_BELOW_C) + seen_below = true; - case BASE_POS_LAST_SINHALA: - { - /* Sinhala base positioning is slightly different from main Indic, in that: - * 1. Its ZWJ behavior is different, - * 2. We don't need to look into the font for consonant positions. - */ + /* -> or that is not a pre-base-reordering Ra, + * + * IMPLEMENTATION NOTES: + * + * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped + * by the logic above already. + */ - if (!has_reph) - base = limit; - - /* Find the last base consonant that is not blocked by ZWJ. If there is - * a ZWJ right before a base consonant, that would request a subjoined form. */ - for (unsigned int i = limit; i < end; i++) - if (is_consonant (info[i])) - { - if (limit < i && info[i - 1].indic_category() == I_Cat(ZWJ)) - break; - else - base = i; - } - - /* Mark all subsequent consonants as below. */ - for (unsigned int i = base + 1; i < end; i++) - if (is_consonant (info[i])) - info[i].indic_position() = POS_BELOW_C; - } - break; + /* -> or arrive at the first consonant. The consonant stopped at will + * be the base. */ + base = i; + } + else + { + /* A ZWJ after a Halant stops the base search, and requests an explicit + * half form. + * A ZWJ before a Halant, requests a subjoined form instead, and hence + * search continues. This is particularly important for Bengali + * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */ + if (start < i && + info[i].indic_category() == I_Cat(ZWJ) && + info[i - 1].indic_category() == I_Cat(H)) + break; + } + } while (i > limit); } /* -> If the syllable starts with Ra + Halant (in a script that has Reph) @@ -683,18 +640,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, if (base < end) info[base].indic_position() = POS_BASE_C; - /* Mark final consonants. A final consonant is one appearing after a matra. - * Happens in Sinhala. */ - for (unsigned int i = base + 1; i < end; i++) - if (info[i].indic_category() == I_Cat(M)) { - for (unsigned int j = i + 1; j < end; j++) - if (is_consonant (info[j])) { - info[j].indic_position() = POS_FINAL_C; - break; - } - break; - } - /* Handle beginning Ra */ if (has_reph) info[start].indic_position() = POS_RA_TO_BECOME_REPH; @@ -761,12 +706,8 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, { /* * Uniscribe doesn't move the Halant with Left Matra. - * TEST: U+092B,U+093F,U+094DE - * We follow. This is important for the Sinhala - * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA - * where U+0DD9 is a left matra and U+0DCA is the virama. - * We don't want to move the virama with the left matra. - * TEST: U+0D9A,U+0DDA + * TEST: U+092B,U+093F,U+094D + * We follow. */ for (unsigned int j = i; j > start; j--) if (info[j - 1].indic_position() != POS_PRE_M) { @@ -1486,11 +1427,10 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, switch ((hb_tag_t) plan->props.script) { case HB_SCRIPT_TAMIL: - case HB_SCRIPT_SINHALA: break; default: - /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala. + /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil. * This means, half forms are submerged into the main consonant's cluster. * This is unnecessary, and makes cursor positioning harder, but that's what * Uniscribe does. */ @@ -1560,48 +1500,6 @@ decompose_indic (const hb_ot_shape_normalize_context_t *c, #endif } - if ((ab == 0x0DDAu || hb_in_range (ab, 0x0DDCu, 0x0DDEu))) - { - /* - * Sinhala split matras... Let the fun begin. - * - * These four characters have Unicode decompositions. However, Uniscribe - * decomposes them "Khmer-style", that is, it uses the character itself to - * get the second half. The first half of all four decompositions is always - * U+0DD9. - * - * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are - * broken with Uniscribe. But we need to support them. As such, we only - * do the Uniscribe-style decomposition if the character is transformed into - * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to - * Unicode decomposition. - * - * Note that we can't unconditionally use Unicode decomposition. That would - * break some other fonts, that are designed to work with Uniscribe, and - * don't have positioning features for the Unicode-style decomposition. - * - * Argh... - * - * The Uniscribe behavior is now documented in the newly published Sinhala - * spec in 2012: - * - * https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping - */ - - - const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data; - hb_codepoint_t glyph; - if (indic_plan->uniscribe_bug_compatible || - (c->font->get_nominal_glyph (ab, &glyph) && - indic_plan->pstf.would_substitute (&glyph, 1, c->font->face))) - { - /* Ok, safe to use Uniscribe-style decomposition. */ - *a = 0x0DD9u; - *b = ab; - return true; - } - } - return (bool) c->unicode->decompose (ab, a, b); } diff --git a/src/hb-ot-shaper-indic.hh b/src/hb-ot-shaper-indic.hh index 15dc595d8..4f822c26e 100644 --- a/src/hb-ot-shaper-indic.hh +++ b/src/hb-ot-shaper-indic.hh @@ -53,10 +53,9 @@ enum ot_position_t { POS_POST_C = 11, POS_AFTER_POST = 12, - POS_FINAL_C = 13, - POS_SMVD = 14, + POS_SMVD = 13, - POS_END = 15 + POS_END = 14 };