diff --git a/src/gen-use-table.py b/src/gen-use-table.py index 9c7e6f489..2afbc760e 100755 --- a/src/gen-use-table.py +++ b/src/gen-use-table.py @@ -8,7 +8,7 @@ if len (sys.argv) != 5: print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr) sys.exit (1) -BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"] +BLACKLISTED_BLOCKS = ["Thai", "Lao"] files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]] @@ -307,11 +307,28 @@ def map_to_use(data): # Resolve Indic_Syllabic_Category - # TODO: These don't have UISC assigned in Unicode 8.0, but - # have UIPC + # TODO: These don't have UISC assigned in Unicode 8.0, but have UIPC if U == 0x17DD: UISC = Vowel_Dependent if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark + # Tibetan: + # TODO: These don't have UISC assigned in Unicode 11.0, but have UIPC + if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent + if 0x0F86 <= U <= 0x0F87: UISC = Tone_Mark + # Overrides to allow NFC order matching syllable + # https://github.com/harfbuzz/harfbuzz/issues/1012 + if UBlock == 'Tibetan' and is_VOWEL (U, UISC, UGC): + if UIPC == Top: + UIPC = Bottom + + # TODO: https://github.com/harfbuzz/harfbuzz/pull/982 + # also https://github.com/harfbuzz/harfbuzz/issues/1012 + if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC): + if UIPC == Top: + UIPC = Bottom + elif UIPC == Bottom: + UIPC = Top + # TODO: https://github.com/harfbuzz/harfbuzz/pull/627 if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom @@ -359,13 +376,6 @@ def map_to_use(data): # https://github.com/roozbehp/unicode-data/issues/8 if U == 0x0A51: UIPC = Bottom - # TODO: https://github.com/harfbuzz/harfbuzz/pull/982 - if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC): - if UIPC == Top: - UIPC = Bottom - elif UIPC == Bottom: - UIPC = Top - assert (UIPC in [Not_Applicable, Visual_Order_Left] or USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC) diff --git a/src/hb-ot-shape-complex-use-table.cc b/src/hb-ot-shape-complex-use-table.cc index d4237b0a1..e9d969fb3 100644 --- a/src/hb-ot-shape-complex-use-table.cc +++ b/src/hb-ot-shape-complex-use-table.cc @@ -194,7 +194,24 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 0DE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B, /* 0DF0 */ O, O, VPst, VPst, O, O, O, O, -#define use_offset_0x1000u 1360 +#define use_offset_0x0f18u 1360 + + + /* Tibetan */ + VBlw, VBlw, O, O, O, O, O, O, + /* 0F20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0F30 */ B, B, B, B, O, FM, O, FM, O, CMAbv, O, O, O, O, VPst, VPre, + /* 0F40 */ B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, B, + /* 0F50 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0F60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, + /* 0F70 */ O, VBlw, VBlw, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VMAbv, VMPst, + /* 0F80 */ VBlw, VAbv, VMAbv, VMAbv, VBlw, IND, VMAbv, VMAbv, B, B, B, B, B, SUB, SUB, SUB, + /* 0F90 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, + /* 0FA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, + /* 0FB0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, O, O, + /* 0FC0 */ O, O, O, O, O, O, FM, O, + +#define use_offset_0x1000u 1536 /* Myanmar */ @@ -210,7 +227,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 1080 */ B, B, MBlw, VPst, VPre, VAbv, VAbv, VMPst, VMPst, VMPst, VMPst, VMPst, VMPst, VMBlw, B, VMPst, /* 1090 */ B, B, B, B, B, B, B, B, B, B, VMPst, VMPst, VPst, VAbv, O, O, -#define use_offset_0x1700u 1520 +#define use_offset_0x1700u 1696 /* Tagalog */ @@ -243,7 +260,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 17D0 */ FM, VAbv, H, FM, O, O, O, O, O, O, O, O, B, VAbv, O, O, /* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x1900u 1760 +#define use_offset_0x1900u 1936 /* Limbu */ @@ -287,7 +304,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 1A80 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, /* 1A90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x1b00u 2176 +#define use_offset_0x1b00u 2352 /* Balinese */ @@ -323,7 +340,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 1C30 */ FAbv, FAbv, FAbv, FAbv, VMPre, VMPre, FM, CMBlw, O, O, O, O, O, O, O, O, /* 1C40 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, B, B, -#define use_offset_0x1cd0u 2512 +#define use_offset_0x1cd0u 2688 /* Vedic Extensions */ @@ -332,20 +349,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 1CE0 */ VMAbv, VMPst, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, O, O, O, O, VMBlw, O, O, /* 1CF0 */ O, O, VMPst, VMPst, VMAbv, CS, CS, VMPst, VMAbv, VMAbv, O, O, O, O, O, O, -#define use_offset_0x1df8u 2560 +#define use_offset_0x1df8u 2736 /* Combining Diacritical Marks Supplement */ O, O, O, FM, O, O, O, O, -#define use_offset_0x2008u 2568 +#define use_offset_0x2008u 2744 /* General Punctuation */ O, O, O, O, ZWNJ, ZWJ, O, O, /* 2010 */ GB, GB, GB, GB, GB, O, O, O, -#define use_offset_0x2060u 2584 +#define use_offset_0x2060u 2760 /* 2060 */ WJ, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, @@ -354,20 +371,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 2070 */ O, O, O, O, FM, O, O, O, O, O, O, O, O, O, O, O, /* 2080 */ O, O, FM, FM, FM, O, O, O, -#define use_offset_0x20f0u 2624 +#define use_offset_0x20f0u 2800 /* Combining Diacritical Marks for Symbols */ /* 20F0 */ VMAbv, O, O, O, O, O, O, O, -#define use_offset_0x25c8u 2632 +#define use_offset_0x25c8u 2808 /* Geometric Shapes */ O, O, O, O, GB, O, O, O, -#define use_offset_0xa800u 2640 +#define use_offset_0xa800u 2816 /* Syloti Nagri */ @@ -454,7 +471,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* AAE0 */ B, B, B, B, B, B, B, B, B, B, B, VPre, VBlw, VAbv, VPre, VPst, /* AAF0 */ O, O, O, O, O, VMPst, H, O, -#define use_offset_0xabc0u 3400 +#define use_offset_0xabc0u 3576 /* Meetei Mayek */ @@ -464,14 +481,14 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O, /* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0xfe00u 3464 +#define use_offset_0xfe00u 3640 /* Variation Selectors */ /* FE00 */ VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, -#define use_offset_0x10a00u 3480 +#define use_offset_0x10a00u 3656 /* Kharoshthi */ @@ -482,7 +499,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H, /* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, -#define use_offset_0x11000u 3560 +#define use_offset_0x11000u 3736 /* Brahmi */ @@ -503,7 +520,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O, -#define use_offset_0x11100u 3752 +#define use_offset_0x11100u 3928 /* Chakma */ @@ -511,7 +528,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11100 */ VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 11110 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 11120 */ B, B, B, B, B, B, B, VBlw, VBlw, VBlw, VAbv, VAbv, VPre, VBlw, VAbv, VAbv, - /* 11130 */ VBlw, VAbv, VAbv, H, CMAbv, O, B, B, B, B, B, B, B, B, B, B, + /* 11130 */ VBlw, VAbv, VAbv, H, CMBlw, O, B, B, B, B, B, B, B, B, B, B, /* 11140 */ O, O, O, O, B, VPst, VPst, O, O, O, O, O, O, O, O, O, /* Mahajani */ @@ -541,7 +558,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw, /* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O, -#define use_offset_0x11280u 4072 +#define use_offset_0x11280u 4248 /* Multani */ @@ -569,7 +586,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, /* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, -#define use_offset_0x11400u 4320 +#define use_offset_0x11400u 4496 /* Newa */ @@ -592,7 +609,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 114C0 */ VMAbv, VMPst, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O, /* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x11580u 4544 +#define use_offset_0x11580u 4720 /* Siddham */ @@ -635,7 +652,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11720 */ VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VAbv, VBlw, VAbv, VAbv, VAbv, O, O, O, O, /* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, -#define use_offset_0x11800u 4992 +#define use_offset_0x11800u 5168 /* Dogra */ @@ -645,7 +662,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw, /* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O, -#define use_offset_0x11a00u 5056 +#define use_offset_0x11a00u 5232 /* Zanabazar Square */ @@ -664,7 +681,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11A80 */ B, B, B, B, O, O, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, /* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O, -#define use_offset_0x11c00u 5216 +#define use_offset_0x11c00u 5392 /* Bhaiksuki */ @@ -685,7 +702,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, /* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O, -#define use_offset_0x11d00u 5400 +#define use_offset_0x11d00u 5576 /* Masaram Gondi */ @@ -705,7 +722,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O, /* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x11ee0u 5576 +#define use_offset_0x11ee0u 5752 /* Makasar */ @@ -713,7 +730,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O, -}; /* Table items: 5600; occupancy: 73% */ +}; /* Table items: 5776; occupancy: 74% */ USE_TABLE_ELEMENT_TYPE hb_use_get_category (hb_codepoint_t u) @@ -725,6 +742,7 @@ hb_use_get_category (hb_codepoint_t u) if (hb_in_range (u, 0x00A0u, 0x00D7u)) return use_table[u - 0x00A0u + use_offset_0x00a0u]; if (hb_in_range (u, 0x0348u, 0x034Fu)) return use_table[u - 0x0348u + use_offset_0x0348u]; if (hb_in_range (u, 0x0900u, 0x0DF7u)) return use_table[u - 0x0900u + use_offset_0x0900u]; + if (hb_in_range (u, 0x0F18u, 0x0FC7u)) return use_table[u - 0x0F18u + use_offset_0x0f18u]; break; case 0x1u: diff --git a/src/hb-ot-shape-complex.hh b/src/hb-ot-shape-complex.hh index c18f1b96c..ce0191e8b 100644 --- a/src/hb-ot-shape-complex.hh +++ b/src/hb-ot-shape-complex.hh @@ -232,12 +232,6 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner) return &_hb_ot_complex_shaper_hangul; - /* Unicode-2.0 additions */ - case HB_SCRIPT_TIBETAN: - - return &_hb_ot_complex_shaper_tibetan; - - /* Unicode-1.1 additions */ case HB_SCRIPT_HEBREW: @@ -289,7 +283,7 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner) /* Unicode-2.0 additions */ - //case HB_SCRIPT_TIBETAN: + case HB_SCRIPT_TIBETAN: /* Unicode-3.0 additions */ //case HB_SCRIPT_MONGOLIAN: