From 0792690b73010e24848fe82d5983ef01f38ed1fb Mon Sep 17 00:00:00 2001 From: David Corbett Date: Fri, 16 Oct 2020 22:41:12 -0400 Subject: [PATCH] [use] Skip WJ and ZWJ when clustering --- src/gen-use-table.py | 13 +- src/hb-ot-shape-complex-use-machine.hh | 598 ++++++++---------- src/hb-ot-shape-complex-use-machine.rl | 9 +- src/hb-ot-shape-complex-use-table.cc | 8 +- src/hb-ot-shape-complex-use.hh | 2 - .../data/in-house/tests/use-syllable.tests | 2 + 6 files changed, 281 insertions(+), 351 deletions(-) diff --git a/src/gen-use-table.py b/src/gen-use-table.py index ddc064696..1605a6a09 100755 --- a/src/gen-use-table.py +++ b/src/gen-use-table.py @@ -243,7 +243,7 @@ def is_BASE(U, UISC, UGC, AJT): Vowel_Independent, ] or # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/484 - AJT in [jt_C, jt_D, jt_L, jt_R] and not is_ZWJ(U, UISC, UGC, AJT) or + AJT in [jt_C, jt_D, jt_L, jt_R] and UISC != Joiner or (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial, Consonant_Subjoined, Vowel, Vowel_Dependent])) def is_BASE_IND(U, UISC, UGC, AJT): @@ -290,16 +290,11 @@ def is_HIEROGLYPH_SEGMENT_END(U, UISC, UGC, AJT): return UISC == Hieroglyph_Segment_End def is_ZWNJ(U, UISC, UGC, AJT): return UISC == Non_Joiner -def is_ZWJ(U, UISC, UGC, AJT): - return UISC == Joiner -def is_Word_Joiner(U, UISC, UGC, AJT): - return U == 0x2060 def is_OTHER(U, UISC, UGC, AJT): - return (UISC == Other + return (UISC in [Joiner, Other] and not is_BASE(U, UISC, UGC, AJT) and not is_SYM(U, UISC, UGC, AJT) and not is_SYM_MOD(U, UISC, UGC, AJT) - and not is_Word_Joiner(U, UISC, UGC, AJT) ) def is_Reserved(U, UISC, UGC, AJT): return UGC == 'Cn' @@ -321,7 +316,7 @@ def is_VOWEL_MOD(U, UISC, UGC, AJT): return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or (UGC != Lo and (UISC == Bindu or U in [0xAA29]))) -# CGJ and VS are handled in find_syllables +# CGJ, VS, WJ, and ZWJ are handled in find_syllables use_mapping = { 'B': is_BASE, 'IND': is_BASE_IND, @@ -341,8 +336,6 @@ use_mapping = { 'SB': is_HIEROGLYPH_SEGMENT_BEGIN, 'SE': is_HIEROGLYPH_SEGMENT_END, 'ZWNJ': is_ZWNJ, - 'ZWJ': is_ZWJ, - 'WJ': is_Word_Joiner, 'O': is_OTHER, 'Rsv': is_Reserved, 'R': is_REPHA, diff --git a/src/hb-ot-shape-complex-use-machine.hh b/src/hb-ot-shape-complex-use-machine.hh index d7330039f..b12a50e45 100644 --- a/src/hb-ot-shape-complex-use-machine.hh +++ b/src/hb-ot-shape-complex-use-machine.hh @@ -37,306 +37,278 @@ #line 39 "hb-ot-shape-complex-use-machine.hh" static const unsigned char _use_syllable_machine_trans_keys[] = { - 12u, 48u, 1u, 15u, 1u, 1u, 12u, 48u, 1u, 1u, 0u, 51u, 11u, 48u, 11u, 48u, - 1u, 15u, 1u, 1u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u, 45u, 46u, - 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 1u, 1u, 24u, 48u, 23u, 48u, 23u, 48u, - 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, 1u, 48u, 13u, 13u, 4u, 4u, - 11u, 48u, 41u, 42u, 42u, 42u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, - 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 24u, 48u, 23u, 48u, - 23u, 48u, 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, 1u, 48u, 1u, 15u, - 4u, 4u, 13u, 13u, 12u, 48u, 1u, 48u, 11u, 48u, 41u, 42u, 42u, 42u, 1u, 5u, - 50u, 52u, 49u, 52u, 49u, 51u, 0 + 1u, 1u, 1u, 1u, 0u, 51u, 11u, 48u, 11u, 48u, 1u, 1u, 22u, 48u, 23u, 48u, + 24u, 47u, 25u, 47u, 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, + 1u, 1u, 24u, 48u, 23u, 48u, 23u, 48u, 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, + 11u, 48u, 1u, 48u, 13u, 13u, 4u, 4u, 11u, 48u, 41u, 42u, 42u, 42u, 11u, 48u, + 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, + 24u, 48u, 24u, 48u, 24u, 48u, 23u, 48u, 23u, 48u, 23u, 48u, 22u, 48u, 22u, 48u, + 22u, 48u, 11u, 48u, 1u, 48u, 1u, 1u, 4u, 4u, 13u, 13u, 1u, 48u, 11u, 48u, + 41u, 42u, 42u, 42u, 1u, 5u, 50u, 52u, 49u, 52u, 49u, 51u, 0 }; static const char _use_syllable_machine_key_spans[] = { - 37, 15, 1, 37, 1, 52, 38, 38, - 15, 1, 27, 26, 24, 23, 22, 2, - 1, 25, 25, 25, 1, 25, 26, 26, - 26, 27, 27, 27, 38, 48, 1, 1, - 38, 2, 1, 38, 27, 26, 24, 23, - 22, 2, 1, 25, 25, 25, 25, 26, - 26, 26, 27, 27, 27, 38, 48, 15, - 1, 1, 37, 48, 38, 2, 1, 5, - 3, 4, 3 + 1, 1, 52, 38, 38, 1, 27, 26, + 24, 23, 22, 2, 1, 25, 25, 25, + 1, 25, 26, 26, 26, 27, 27, 27, + 38, 48, 1, 1, 38, 2, 1, 38, + 27, 26, 24, 23, 22, 2, 1, 25, + 25, 25, 25, 26, 26, 26, 27, 27, + 27, 38, 48, 1, 1, 1, 48, 38, + 2, 1, 5, 3, 4, 3 }; static const short _use_syllable_machine_index_offsets[] = { - 0, 38, 54, 56, 94, 96, 149, 188, - 227, 243, 245, 273, 300, 325, 349, 372, - 375, 377, 403, 429, 455, 457, 483, 510, - 537, 564, 592, 620, 648, 687, 736, 738, - 740, 779, 782, 784, 823, 851, 878, 903, - 927, 950, 953, 955, 981, 1007, 1033, 1059, - 1086, 1113, 1140, 1168, 1196, 1224, 1263, 1312, - 1328, 1330, 1332, 1370, 1419, 1458, 1461, 1463, - 1469, 1473, 1478 + 0, 2, 4, 57, 96, 135, 137, 165, + 192, 217, 241, 264, 267, 269, 295, 321, + 347, 349, 375, 402, 429, 456, 484, 512, + 540, 579, 628, 630, 632, 671, 674, 676, + 715, 743, 770, 795, 819, 842, 845, 847, + 873, 899, 925, 951, 978, 1005, 1032, 1060, + 1088, 1116, 1155, 1204, 1206, 1208, 1210, 1259, + 1298, 1301, 1303, 1309, 1313, 1318 }; static const char _use_syllable_machine_indicies[] = { - 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 1, 0, 3, 2, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 4, 2, 3, 2, - 6, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, - 6, 5, 5, 5, 6, 5, 7, 5, - 8, 9, 10, 8, 11, 12, 10, 10, - 10, 10, 10, 3, 13, 14, 10, 15, - 8, 8, 16, 17, 10, 10, 18, 19, - 20, 21, 22, 23, 24, 18, 25, 26, - 27, 28, 29, 30, 10, 31, 32, 33, - 10, 34, 35, 36, 37, 38, 39, 40, - 13, 41, 10, 42, 10, 44, 1, 43, - 43, 45, 43, 43, 43, 43, 43, 43, - 46, 47, 48, 49, 50, 51, 52, 46, - 53, 9, 54, 55, 56, 57, 43, 58, - 59, 60, 43, 43, 43, 43, 61, 62, - 63, 64, 1, 43, 44, 1, 43, 43, - 45, 43, 43, 43, 43, 43, 43, 46, - 47, 48, 49, 50, 51, 52, 46, 53, - 54, 54, 55, 56, 57, 43, 58, 59, - 60, 43, 43, 43, 43, 61, 62, 63, - 64, 1, 43, 44, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 65, 65, 65, - 65, 66, 65, 44, 65, 46, 47, 48, - 49, 50, 43, 43, 43, 43, 43, 43, - 55, 56, 57, 43, 58, 59, 60, 43, - 43, 43, 43, 47, 62, 63, 64, 67, - 43, 47, 48, 49, 50, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 58, - 59, 60, 43, 43, 43, 43, 43, 62, - 63, 64, 67, 43, 48, 49, 50, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 62, 63, 64, 43, 49, 50, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 62, 63, 64, 43, 50, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 62, 63, 64, 43, 62, 63, 43, 63, - 43, 48, 49, 50, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 58, 59, - 60, 43, 43, 43, 43, 43, 62, 63, - 64, 67, 43, 48, 49, 50, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 59, 60, 43, 43, 43, 43, 43, - 62, 63, 64, 67, 43, 48, 49, 50, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 60, 43, 43, 43, - 43, 43, 62, 63, 64, 67, 43, 69, - 68, 48, 49, 50, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 62, 63, - 64, 67, 43, 47, 48, 49, 50, 43, - 43, 43, 43, 43, 43, 55, 56, 57, - 43, 58, 59, 60, 43, 43, 43, 43, - 47, 62, 63, 64, 67, 43, 47, 48, - 49, 50, 43, 43, 43, 43, 43, 43, - 43, 56, 57, 43, 58, 59, 60, 43, - 43, 43, 43, 47, 62, 63, 64, 67, - 43, 47, 48, 49, 50, 43, 43, 43, - 43, 43, 43, 43, 43, 57, 43, 58, - 59, 60, 43, 43, 43, 43, 47, 62, - 63, 64, 67, 43, 46, 47, 48, 49, - 50, 43, 52, 46, 43, 43, 43, 55, - 56, 57, 43, 58, 59, 60, 43, 43, - 43, 43, 47, 62, 63, 64, 67, 43, - 46, 47, 48, 49, 50, 43, 43, 46, - 43, 43, 43, 55, 56, 57, 43, 58, - 59, 60, 43, 43, 43, 43, 47, 62, - 63, 64, 67, 43, 46, 47, 48, 49, - 50, 51, 52, 46, 43, 43, 43, 55, - 56, 57, 43, 58, 59, 60, 43, 43, - 43, 43, 47, 62, 63, 64, 67, 43, - 44, 1, 43, 43, 45, 43, 43, 43, - 43, 43, 43, 46, 47, 48, 49, 50, - 51, 52, 46, 53, 43, 54, 55, 56, - 57, 43, 58, 59, 60, 43, 43, 43, - 43, 61, 62, 63, 64, 1, 43, 44, - 65, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 65, 65, 66, 65, 65, - 65, 65, 65, 65, 65, 47, 48, 49, - 50, 65, 65, 65, 65, 65, 65, 65, - 65, 65, 65, 58, 59, 60, 65, 65, - 65, 65, 65, 62, 63, 64, 67, 65, - 71, 70, 11, 72, 44, 1, 43, 43, - 45, 43, 43, 43, 43, 43, 43, 46, - 47, 48, 49, 50, 51, 52, 46, 53, - 9, 54, 55, 56, 57, 43, 58, 59, - 60, 43, 17, 73, 43, 61, 62, 63, - 64, 1, 43, 17, 73, 74, 73, 74, - 3, 6, 75, 75, 76, 75, 75, 75, - 75, 75, 75, 18, 19, 20, 21, 22, - 23, 24, 18, 25, 27, 27, 28, 29, - 30, 75, 31, 32, 33, 75, 75, 75, - 75, 37, 38, 39, 40, 6, 75, 18, - 19, 20, 21, 22, 75, 75, 75, 75, - 75, 75, 28, 29, 30, 75, 31, 32, - 33, 75, 75, 75, 75, 19, 38, 39, - 40, 77, 75, 19, 20, 21, 22, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 31, 32, 33, 75, 75, 75, 75, - 75, 38, 39, 40, 77, 75, 20, 21, - 22, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 38, 39, 40, 75, 21, - 22, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 38, 39, 40, 75, 22, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 38, 39, 40, 75, 38, 39, - 75, 39, 75, 20, 21, 22, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 31, 32, 33, 75, 75, 75, 75, 75, - 38, 39, 40, 77, 75, 20, 21, 22, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 32, 33, 75, 75, 75, - 75, 75, 38, 39, 40, 77, 75, 20, - 21, 22, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 33, 75, - 75, 75, 75, 75, 38, 39, 40, 77, - 75, 20, 21, 22, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 75, 38, 39, - 40, 77, 75, 19, 20, 21, 22, 75, - 75, 75, 75, 75, 75, 28, 29, 30, - 75, 31, 32, 33, 75, 75, 75, 75, - 19, 38, 39, 40, 77, 75, 19, 20, - 21, 22, 75, 75, 75, 75, 75, 75, - 75, 29, 30, 75, 31, 32, 33, 75, - 75, 75, 75, 19, 38, 39, 40, 77, - 75, 19, 20, 21, 22, 75, 75, 75, - 75, 75, 75, 75, 75, 30, 75, 31, - 32, 33, 75, 75, 75, 75, 19, 38, - 39, 40, 77, 75, 18, 19, 20, 21, - 22, 75, 24, 18, 75, 75, 75, 28, - 29, 30, 75, 31, 32, 33, 75, 75, - 75, 75, 19, 38, 39, 40, 77, 75, - 18, 19, 20, 21, 22, 75, 75, 18, - 75, 75, 75, 28, 29, 30, 75, 31, - 32, 33, 75, 75, 75, 75, 19, 38, - 39, 40, 77, 75, 18, 19, 20, 21, - 22, 23, 24, 18, 75, 75, 75, 28, - 29, 30, 75, 31, 32, 33, 75, 75, - 75, 75, 19, 38, 39, 40, 77, 75, - 3, 6, 75, 75, 76, 75, 75, 75, - 75, 75, 75, 18, 19, 20, 21, 22, - 23, 24, 18, 25, 75, 27, 28, 29, - 30, 75, 31, 32, 33, 75, 75, 75, - 75, 37, 38, 39, 40, 6, 75, 3, - 75, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 75, 75, 4, 75, 75, - 75, 75, 75, 75, 75, 19, 20, 21, - 22, 75, 75, 75, 75, 75, 75, 75, - 75, 75, 75, 31, 32, 33, 75, 75, - 75, 75, 75, 38, 39, 40, 77, 75, - 3, 78, 78, 78, 78, 78, 78, 78, - 78, 78, 78, 78, 78, 78, 4, 78, - 79, 75, 14, 75, 6, 78, 78, 78, - 78, 78, 78, 78, 78, 78, 78, 78, - 78, 78, 78, 78, 78, 78, 78, 78, - 78, 78, 78, 78, 78, 78, 78, 78, - 78, 78, 78, 78, 6, 78, 78, 78, - 6, 78, 9, 75, 75, 75, 9, 75, - 75, 75, 75, 75, 3, 6, 14, 75, - 76, 75, 75, 75, 75, 75, 75, 18, - 19, 20, 21, 22, 23, 24, 18, 25, - 26, 27, 28, 29, 30, 75, 31, 32, - 33, 75, 34, 35, 75, 37, 38, 39, - 40, 6, 75, 3, 6, 75, 75, 76, - 75, 75, 75, 75, 75, 75, 18, 19, - 20, 21, 22, 23, 24, 18, 25, 26, - 27, 28, 29, 30, 75, 31, 32, 33, - 75, 75, 75, 75, 37, 38, 39, 40, - 6, 75, 34, 35, 75, 35, 75, 9, - 78, 78, 78, 9, 78, 81, 80, 41, - 80, 41, 81, 80, 81, 80, 41, 80, - 42, 80, 0 + 1, 0, 2, 0, 3, 4, 5, 3, + 6, 7, 5, 5, 5, 5, 5, 1, + 8, 9, 5, 5, 5, 3, 10, 11, + 5, 5, 12, 13, 14, 15, 16, 17, + 18, 12, 19, 20, 21, 22, 23, 24, + 5, 25, 26, 27, 5, 28, 29, 30, + 31, 32, 33, 34, 8, 35, 5, 36, + 5, 38, 39, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 40, 41, 42, 43, + 44, 45, 46, 40, 47, 4, 48, 49, + 50, 51, 37, 52, 53, 54, 37, 37, + 37, 37, 55, 56, 57, 58, 39, 37, + 38, 39, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 40, 41, 42, 43, 44, + 45, 46, 40, 47, 48, 48, 49, 50, + 51, 37, 52, 53, 54, 37, 37, 37, + 37, 55, 56, 57, 58, 39, 37, 38, + 59, 40, 41, 42, 43, 44, 37, 37, + 37, 37, 37, 37, 49, 50, 51, 37, + 52, 53, 54, 37, 37, 37, 37, 41, + 56, 57, 58, 60, 37, 41, 42, 43, + 44, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 52, 53, 54, 37, 37, + 37, 37, 37, 56, 57, 58, 60, 37, + 42, 43, 44, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 56, 57, 58, + 37, 43, 44, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 56, 57, 58, + 37, 44, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 56, 57, 58, 37, + 56, 57, 37, 57, 37, 42, 43, 44, + 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 52, 53, 54, 37, 37, 37, + 37, 37, 56, 57, 58, 60, 37, 42, + 43, 44, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 53, 54, 37, + 37, 37, 37, 37, 56, 57, 58, 60, + 37, 42, 43, 44, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, + 54, 37, 37, 37, 37, 37, 56, 57, + 58, 60, 37, 62, 61, 42, 43, 44, + 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 56, 57, 58, 60, 37, 41, + 42, 43, 44, 37, 37, 37, 37, 37, + 37, 49, 50, 51, 37, 52, 53, 54, + 37, 37, 37, 37, 41, 56, 57, 58, + 60, 37, 41, 42, 43, 44, 37, 37, + 37, 37, 37, 37, 37, 50, 51, 37, + 52, 53, 54, 37, 37, 37, 37, 41, + 56, 57, 58, 60, 37, 41, 42, 43, + 44, 37, 37, 37, 37, 37, 37, 37, + 37, 51, 37, 52, 53, 54, 37, 37, + 37, 37, 41, 56, 57, 58, 60, 37, + 40, 41, 42, 43, 44, 37, 46, 40, + 37, 37, 37, 49, 50, 51, 37, 52, + 53, 54, 37, 37, 37, 37, 41, 56, + 57, 58, 60, 37, 40, 41, 42, 43, + 44, 37, 37, 40, 37, 37, 37, 49, + 50, 51, 37, 52, 53, 54, 37, 37, + 37, 37, 41, 56, 57, 58, 60, 37, + 40, 41, 42, 43, 44, 45, 46, 40, + 37, 37, 37, 49, 50, 51, 37, 52, + 53, 54, 37, 37, 37, 37, 41, 56, + 57, 58, 60, 37, 38, 39, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 40, + 41, 42, 43, 44, 45, 46, 40, 47, + 37, 48, 49, 50, 51, 37, 52, 53, + 54, 37, 37, 37, 37, 55, 56, 57, + 58, 39, 37, 38, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 41, 42, 43, 44, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 52, + 53, 54, 59, 59, 59, 59, 59, 56, + 57, 58, 60, 59, 64, 63, 6, 65, + 38, 39, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 40, 41, 42, 43, 44, + 45, 46, 40, 47, 4, 48, 49, 50, + 51, 37, 52, 53, 54, 37, 11, 66, + 37, 55, 56, 57, 58, 39, 37, 11, + 66, 67, 66, 67, 1, 69, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 12, + 13, 14, 15, 16, 17, 18, 12, 19, + 21, 21, 22, 23, 24, 68, 25, 26, + 27, 68, 68, 68, 68, 31, 32, 33, + 34, 69, 68, 12, 13, 14, 15, 16, + 68, 68, 68, 68, 68, 68, 22, 23, + 24, 68, 25, 26, 27, 68, 68, 68, + 68, 13, 32, 33, 34, 70, 68, 13, + 14, 15, 16, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 25, 26, 27, + 68, 68, 68, 68, 68, 32, 33, 34, + 70, 68, 14, 15, 16, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 32, + 33, 34, 68, 15, 16, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 32, + 33, 34, 68, 16, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 32, 33, + 34, 68, 32, 33, 68, 33, 68, 14, + 15, 16, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 25, 26, 27, 68, + 68, 68, 68, 68, 32, 33, 34, 70, + 68, 14, 15, 16, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 26, + 27, 68, 68, 68, 68, 68, 32, 33, + 34, 70, 68, 14, 15, 16, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 27, 68, 68, 68, 68, 68, + 32, 33, 34, 70, 68, 14, 15, 16, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 32, 33, 34, 70, 68, 13, + 14, 15, 16, 68, 68, 68, 68, 68, + 68, 22, 23, 24, 68, 25, 26, 27, + 68, 68, 68, 68, 13, 32, 33, 34, + 70, 68, 13, 14, 15, 16, 68, 68, + 68, 68, 68, 68, 68, 23, 24, 68, + 25, 26, 27, 68, 68, 68, 68, 13, + 32, 33, 34, 70, 68, 13, 14, 15, + 16, 68, 68, 68, 68, 68, 68, 68, + 68, 24, 68, 25, 26, 27, 68, 68, + 68, 68, 13, 32, 33, 34, 70, 68, + 12, 13, 14, 15, 16, 68, 18, 12, + 68, 68, 68, 22, 23, 24, 68, 25, + 26, 27, 68, 68, 68, 68, 13, 32, + 33, 34, 70, 68, 12, 13, 14, 15, + 16, 68, 68, 12, 68, 68, 68, 22, + 23, 24, 68, 25, 26, 27, 68, 68, + 68, 68, 13, 32, 33, 34, 70, 68, + 12, 13, 14, 15, 16, 17, 18, 12, + 68, 68, 68, 22, 23, 24, 68, 25, + 26, 27, 68, 68, 68, 68, 13, 32, + 33, 34, 70, 68, 1, 69, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 12, + 13, 14, 15, 16, 17, 18, 12, 19, + 68, 21, 22, 23, 24, 68, 25, 26, + 27, 68, 68, 68, 68, 31, 32, 33, + 34, 69, 68, 1, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, + 68, 13, 14, 15, 16, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 25, + 26, 27, 68, 68, 68, 68, 68, 32, + 33, 34, 70, 68, 1, 71, 72, 68, + 9, 68, 4, 68, 68, 68, 4, 68, + 68, 68, 68, 68, 1, 69, 9, 68, + 68, 68, 68, 68, 68, 68, 68, 12, + 13, 14, 15, 16, 17, 18, 12, 19, + 20, 21, 22, 23, 24, 68, 25, 26, + 27, 68, 28, 29, 68, 31, 32, 33, + 34, 69, 68, 1, 69, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 12, 13, + 14, 15, 16, 17, 18, 12, 19, 20, + 21, 22, 23, 24, 68, 25, 26, 27, + 68, 68, 68, 68, 31, 32, 33, 34, + 69, 68, 28, 29, 68, 29, 68, 4, + 71, 71, 71, 4, 71, 74, 73, 35, + 73, 35, 74, 73, 74, 73, 35, 73, + 36, 73, 0 }; static const char _use_syllable_machine_trans_targs[] = { - 5, 8, 5, 35, 2, 5, 1, 46, - 5, 6, 5, 30, 32, 55, 56, 58, - 59, 33, 36, 37, 38, 39, 40, 50, - 51, 52, 60, 53, 47, 48, 49, 43, - 44, 45, 61, 62, 63, 54, 41, 42, - 5, 64, 66, 5, 7, 0, 10, 11, - 12, 13, 14, 25, 26, 27, 28, 22, - 23, 24, 17, 18, 19, 29, 15, 16, - 5, 5, 9, 20, 5, 21, 5, 31, - 5, 34, 5, 5, 3, 4, 5, 57, - 5, 65 + 2, 31, 42, 2, 3, 2, 26, 28, + 51, 52, 54, 29, 32, 33, 34, 35, + 36, 46, 47, 48, 55, 49, 43, 44, + 45, 39, 40, 41, 56, 57, 58, 50, + 37, 38, 2, 59, 61, 2, 4, 5, + 6, 7, 8, 9, 10, 21, 22, 23, + 24, 18, 19, 20, 13, 14, 15, 25, + 11, 12, 2, 2, 16, 2, 17, 2, + 27, 2, 30, 2, 2, 0, 1, 2, + 53, 2, 60 }; static const char _use_syllable_machine_trans_actions[] = { - 1, 0, 2, 3, 0, 4, 0, 5, - 8, 5, 9, 0, 5, 10, 0, 10, - 3, 0, 5, 5, 0, 0, 0, 5, - 5, 5, 3, 3, 5, 5, 5, 5, - 5, 5, 0, 0, 0, 3, 0, 0, - 11, 0, 0, 12, 5, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 5, 0, + 1, 2, 2, 5, 0, 6, 0, 0, + 0, 0, 2, 0, 2, 2, 0, 0, + 0, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 0, 0, 0, 2, + 0, 0, 7, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 13, 14, 0, 0, 15, 0, 16, 0, - 17, 0, 18, 19, 0, 0, 20, 0, - 21, 0 + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 9, 10, 0, 11, 0, 12, + 0, 13, 0, 14, 15, 0, 0, 16, + 0, 17, 0 }; static const char _use_syllable_machine_to_state_actions[] = { - 0, 0, 0, 0, 0, 6, 0, 0, + 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0 + 0, 0, 0, 0, 0, 0 }; static const char _use_syllable_machine_from_state_actions[] = { - 0, 0, 0, 0, 0, 7, 0, 0, + 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0 + 0, 0, 0, 0, 0, 0 }; static const short _use_syllable_machine_eof_trans[] = { - 1, 3, 3, 6, 6, 0, 44, 44, - 66, 66, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 69, 44, 44, 44, - 44, 44, 44, 44, 44, 66, 71, 73, - 44, 75, 75, 76, 76, 76, 76, 76, - 76, 76, 76, 76, 76, 76, 76, 76, - 76, 76, 76, 76, 76, 76, 76, 79, - 76, 76, 79, 76, 76, 76, 76, 79, - 81, 81, 81 + 1, 1, 0, 38, 38, 60, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 62, 38, 38, 38, 38, 38, 38, 38, + 38, 60, 64, 66, 38, 68, 68, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 72, 69, 69, 69, 69, + 69, 69, 72, 74, 74, 74 }; -static const int use_syllable_machine_start = 5; -static const int use_syllable_machine_first_final = 5; +static const int use_syllable_machine_start = 2; +static const int use_syllable_machine_first_final = 2; static const int use_syllable_machine_error = -1; -static const int use_syllable_machine_en_main = 5; +static const int use_syllable_machine_en_main = 2; #line 39 "hb-ot-shape-complex-use-machine.rl" -#line 166 "hb-ot-shape-complex-use-machine.rl" +#line 163 "hb-ot-shape-complex-use-machine.rl" #define found_syllable(syllable_type) \ @@ -379,7 +351,7 @@ find_syllables_use (hb_buffer_t *buffer) unsigned int act; int cs; -#line 383 "hb-ot-shape-complex-use-machine.hh" +#line 355 "hb-ot-shape-complex-use-machine.hh" { cs = use_syllable_machine_start; ts = 0; @@ -387,12 +359,12 @@ find_syllables_use (hb_buffer_t *buffer) act = 0; } -#line 210 "hb-ot-shape-complex-use-machine.rl" +#line 207 "hb-ot-shape-complex-use-machine.rl" unsigned int syllable_serial = 1; -#line 396 "hb-ot-shape-complex-use-machine.hh" +#line 368 "hb-ot-shape-complex-use-machine.hh" { int _slen; int _trans; @@ -402,11 +374,11 @@ find_syllables_use (hb_buffer_t *buffer) goto _test_eof; _resume: switch ( _use_syllable_machine_from_state_actions[cs] ) { - case 7: + case 4: #line 1 "NONE" {ts = p;} break; -#line 410 "hb-ot-shape-complex-use-machine.hh" +#line 382 "hb-ot-shape-complex-use-machine.hh" } _keys = _use_syllable_machine_trans_keys + (cs<<1); @@ -424,104 +396,76 @@ _eof_trans: goto _again; switch ( _use_syllable_machine_trans_actions[_trans] ) { - case 5: + case 2: #line 1 "NONE" {te = p+1;} break; + case 5: +#line 150 "hb-ot-shape-complex-use-machine.rl" + {te = p+1;{ found_syllable (independent_cluster); }} + break; + case 9: +#line 153 "hb-ot-shape-complex-use-machine.rl" + {te = p+1;{ found_syllable (standard_cluster); }} + break; + case 7: +#line 158 "hb-ot-shape-complex-use-machine.rl" + {te = p+1;{ found_syllable (broken_cluster); }} + break; + case 6: +#line 159 "hb-ot-shape-complex-use-machine.rl" + {te = p+1;{ found_syllable (non_cluster); }} + break; + case 10: +#line 151 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (virama_terminated_cluster); }} + break; + case 11: +#line 152 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (sakot_terminated_cluster); }} + break; case 8: #line 153 "hb-ot-shape-complex-use-machine.rl" - {te = p+1;{ found_syllable (independent_cluster); }} + {te = p;p--;{ found_syllable (standard_cluster); }} break; case 13: -#line 156 "hb-ot-shape-complex-use-machine.rl" - {te = p+1;{ found_syllable (standard_cluster); }} - break; - case 11: -#line 161 "hb-ot-shape-complex-use-machine.rl" - {te = p+1;{ found_syllable (broken_cluster); }} - break; - case 9: -#line 162 "hb-ot-shape-complex-use-machine.rl" - {te = p+1;{ found_syllable (non_cluster); }} - break; - case 14: #line 154 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (virama_terminated_cluster); }} - break; - case 15: -#line 155 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (sakot_terminated_cluster); }} + {te = p;p--;{ found_syllable (number_joiner_terminated_cluster); }} break; case 12: +#line 155 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (numeral_cluster); }} + break; + case 14: #line 156 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (standard_cluster); }} + {te = p;p--;{ found_syllable (symbol_cluster); }} break; case 17: #line 157 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (number_joiner_terminated_cluster); }} - break; - case 16: -#line 158 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (numeral_cluster); }} - break; - case 18: -#line 159 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (symbol_cluster); }} - break; - case 21: -#line 160 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (hieroglyph_cluster); }} break; - case 19: -#line 161 "hb-ot-shape-complex-use-machine.rl" + case 15: +#line 158 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (broken_cluster); }} break; - case 20: -#line 162 "hb-ot-shape-complex-use-machine.rl" + case 16: +#line 159 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (non_cluster); }} break; case 1: -#line 156 "hb-ot-shape-complex-use-machine.rl" - {{p = ((te))-1;}{ found_syllable (standard_cluster); }} - break; - case 4: -#line 161 "hb-ot-shape-complex-use-machine.rl" +#line 158 "hb-ot-shape-complex-use-machine.rl" {{p = ((te))-1;}{ found_syllable (broken_cluster); }} break; - case 2: -#line 1 "NONE" - { switch( act ) { - case 9: - {{p = ((te))-1;} found_syllable (broken_cluster); } - break; - case 10: - {{p = ((te))-1;} found_syllable (non_cluster); } - break; - } - } - break; - case 3: -#line 1 "NONE" - {te = p+1;} -#line 161 "hb-ot-shape-complex-use-machine.rl" - {act = 9;} - break; - case 10: -#line 1 "NONE" - {te = p+1;} -#line 162 "hb-ot-shape-complex-use-machine.rl" - {act = 10;} - break; -#line 516 "hb-ot-shape-complex-use-machine.hh" +#line 460 "hb-ot-shape-complex-use-machine.hh" } _again: switch ( _use_syllable_machine_to_state_actions[cs] ) { - case 6: + case 3: #line 1 "NONE" {ts = 0;} break; -#line 525 "hb-ot-shape-complex-use-machine.hh" +#line 469 "hb-ot-shape-complex-use-machine.hh" } if ( ++p != pe ) @@ -537,7 +481,7 @@ _again: } -#line 215 "hb-ot-shape-complex-use-machine.rl" +#line 212 "hb-ot-shape-complex-use-machine.rl" } diff --git a/src/hb-ot-shape-complex-use-machine.rl b/src/hb-ot-shape-complex-use-machine.rl index df9b63865..06d7c50ff 100644 --- a/src/hb-ot-shape-complex-use-machine.rl +++ b/src/hb-ot-shape-complex-use-machine.rl @@ -57,8 +57,6 @@ H = 12; # HALANT HN = 13; # HALANT_NUM ZWNJ = 14; # Zero width non-joiner -ZWJ = 15; # Zero width joiner -WJ = 16; # Word joiner Rsv = 17; # Reserved characters R = 18; # REPHA S = 19; # SYM @@ -98,8 +96,7 @@ FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable h = H | HVM | Sk; -# Override: Adhoc ZWJ placement. https://github.com/harfbuzz/harfbuzz/issues/542#issuecomment-353169729 -consonant_modifiers = CMAbv* CMBlw* ((ZWJ?.h.ZWJ? B | SUB) CMAbv? CMBlw*)*; +consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*; medial_consonants = MPre? MAbv? MBlw? MPst?; dependent_vowels = VPre* VAbv* VBlw* VPst*; vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*; @@ -126,7 +123,7 @@ symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+; virama_terminated_cluster = complex_syllable_start consonant_modifiers - ZWJ?.h.ZWJ? + h ; sakot_terminated_cluster = complex_syllable_start @@ -146,7 +143,7 @@ number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail; numeral_cluster = N numeral_cluster_tail?; symbol_cluster = (S | GB) symbol_cluster_tail?; hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*; -independent_cluster = (IND | O | Rsv | WJ); +independent_cluster = (IND | O | Rsv); other = any; main := |* diff --git a/src/hb-ot-shape-complex-use-table.cc b/src/hb-ot-shape-complex-use-table.cc index 1e05f9a51..b550502a5 100644 --- a/src/hb-ot-shape-complex-use-table.cc +++ b/src/hb-ot-shape-complex-use-table.cc @@ -57,8 +57,6 @@ #define SE USE_SE /* HIEROGLYPH_SEGMENT_END */ #define SUB USE_SUB /* CONS_SUB */ #define Sk USE_Sk /* SAKOT */ -#define WJ USE_WJ /* Word_Joiner */ -#define ZWJ USE_ZWJ /* ZWJ */ #define ZWNJ USE_ZWNJ /* ZWNJ */ #define CMAbv USE_CMAbv #define CMBlw USE_CMBlw @@ -423,12 +421,12 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* General Punctuation */ - O, O, O, O, ZWNJ, ZWJ, O, O, + O, O, O, O, ZWNJ, O, O, O, /* 2010 */ GB, GB, GB, GB, GB, O, O, O, #define use_offset_0x2060u 3064 - /* 2060 */ WJ, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 2060 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* Superscripts and Subscripts */ @@ -1180,8 +1178,6 @@ hb_use_get_category (hb_codepoint_t u) #undef SE #undef SUB #undef Sk -#undef WJ -#undef ZWJ #undef ZWNJ #undef CMAbv #undef CMBlw diff --git a/src/hb-ot-shape-complex-use.hh b/src/hb-ot-shape-complex-use.hh index 1ed3b5a85..f7d34abf7 100644 --- a/src/hb-ot-shape-complex-use.hh +++ b/src/hb-ot-shape-complex-use.hh @@ -58,8 +58,6 @@ enum use_category_t { USE_HN = 13, /* HALANT_NUM */ USE_ZWNJ = 14, /* Zero width non-joiner */ - USE_ZWJ = 15, /* Zero width joiner */ - USE_WJ = 16, /* Word joiner */ USE_Rsv = 17, /* Reserved characters */ USE_R = 18, /* REPHA */ USE_S = 19, /* SYM */ diff --git a/test/shaping/data/in-house/tests/use-syllable.tests b/test/shaping/data/in-house/tests/use-syllable.tests index 61f288758..10f9e0659 100644 --- a/test/shaping/data/in-house/tests/use-syllable.tests +++ b/test/shaping/data/in-house/tests/use-syllable.tests @@ -18,3 +18,5 @@ ../fonts/573d3a3177c9a8646e94c8a0d7b224334340946a.ttf:--font-funcs=ft:U+11410,U+11442,U+200C,U+034F,U+11411:[Ga=0+576|Virama=0@70,70+0|Gha=4+566] ../fonts/573d3a3177c9a8646e94c8a0d7b224334340946a.ttf:--font-funcs=ft:U+11410,U+200C,U+11442,U+034F,U+11411:[Ga.icd=0+367|Gha.diag=1@100,0+386] ../fonts/e68a88939e0f06e34d2bc911f09b70890289c8fd.ttf::U+AA00,U+200C,U+AA34:[raMedial_cham_pre=0+400|a_cham=0+1121] +../fonts/2a670df15b73a5dc75a5cc491bde5ac93c5077dc.ttf::U+11124,U+200D,U+11127:[u11124=0+514|u11127=0+0] +../fonts/2a670df15b73a5dc75a5cc491bde5ac93c5077dc.ttf::U+11124,U+2060,U+11127:[u11124=0+514|u11127=1+0]