[tibetan] Route Tibetan through USE

Fixes https://github.com/harfbuzz/harfbuzz/pull/933
https://github.com/harfbuzz/harfbuzz/issues/1012

Tibetan failures go from 0 to 2:

TIBETAN: 208467 out of 208469 tests passed. 2 failed (0.000959375%)
This commit is contained in:
Behdad Esfahbod 2018-10-02 18:43:29 +02:00
parent 77792187be
commit 32a438166f
3 changed files with 65 additions and 43 deletions

View File

@ -8,7 +8,7 @@ if len (sys.argv) != 5:
print ("usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt", file=sys.stderr)
sys.exit (1)
BLACKLISTED_BLOCKS = ["Thai", "Lao", "Tibetan"]
BLACKLISTED_BLOCKS = ["Thai", "Lao"]
files = [io.open (x, encoding='utf-8') for x in sys.argv[1:]]
@ -307,11 +307,28 @@ def map_to_use(data):
# Resolve Indic_Syllabic_Category
# TODO: These don't have UISC assigned in Unicode 8.0, but
# have UIPC
# TODO: These don't have UISC assigned in Unicode 8.0, but have UIPC
if U == 0x17DD: UISC = Vowel_Dependent
if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
# Tibetan:
# TODO: These don't have UISC assigned in Unicode 11.0, but have UIPC
if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
if 0x0F86 <= U <= 0x0F87: UISC = Tone_Mark
# Overrides to allow NFC order matching syllable
# https://github.com/harfbuzz/harfbuzz/issues/1012
if UBlock == 'Tibetan' and is_VOWEL (U, UISC, UGC):
if UIPC == Top:
UIPC = Bottom
# TODO: https://github.com/harfbuzz/harfbuzz/pull/982
# also https://github.com/harfbuzz/harfbuzz/issues/1012
if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC):
if UIPC == Top:
UIPC = Bottom
elif UIPC == Bottom:
UIPC = Top
# TODO: https://github.com/harfbuzz/harfbuzz/pull/627
if 0x1BF2 <= U <= 0x1BF3: UISC = Nukta; UIPC = Bottom
@ -359,13 +376,6 @@ def map_to_use(data):
# https://github.com/roozbehp/unicode-data/issues/8
if U == 0x0A51: UIPC = Bottom
# TODO: https://github.com/harfbuzz/harfbuzz/pull/982
if UBlock == 'Chakma' and is_VOWEL (U, UISC, UGC):
if UIPC == Top:
UIPC = Bottom
elif UIPC == Bottom:
UIPC = Top
assert (UIPC in [Not_Applicable, Visual_Order_Left] or
USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)

View File

@ -194,7 +194,24 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 0DE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B,
/* 0DF0 */ O, O, VPst, VPst, O, O, O, O,
#define use_offset_0x1000u 1360
#define use_offset_0x0f18u 1360
/* Tibetan */
VBlw, VBlw, O, O, O, O, O, O,
/* 0F20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 0F30 */ B, B, B, B, O, FM, O, FM, O, CMAbv, O, O, O, O, VPst, VPre,
/* 0F40 */ B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, B,
/* 0F50 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 0F60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, O, O,
/* 0F70 */ O, VBlw, VBlw, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VMAbv, VMPst,
/* 0F80 */ VBlw, VAbv, VMAbv, VMAbv, VBlw, IND, VMAbv, VMAbv, B, B, B, B, B, SUB, SUB, SUB,
/* 0F90 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
/* 0FA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
/* 0FB0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, O, O,
/* 0FC0 */ O, O, O, O, O, O, FM, O,
#define use_offset_0x1000u 1536
/* Myanmar */
@ -210,7 +227,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 1080 */ B, B, MBlw, VPst, VPre, VAbv, VAbv, VMPst, VMPst, VMPst, VMPst, VMPst, VMPst, VMBlw, B, VMPst,
/* 1090 */ B, B, B, B, B, B, B, B, B, B, VMPst, VMPst, VPst, VAbv, O, O,
#define use_offset_0x1700u 1520
#define use_offset_0x1700u 1696
/* Tagalog */
@ -243,7 +260,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 17D0 */ FM, VAbv, H, FM, O, O, O, O, O, O, O, O, B, VAbv, O, O,
/* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x1900u 1760
#define use_offset_0x1900u 1936
/* Limbu */
@ -287,7 +304,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 1A80 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
/* 1A90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x1b00u 2176
#define use_offset_0x1b00u 2352
/* Balinese */
@ -323,7 +340,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 1C30 */ FAbv, FAbv, FAbv, FAbv, VMPre, VMPre, FM, CMBlw, O, O, O, O, O, O, O, O,
/* 1C40 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, B, B,
#define use_offset_0x1cd0u 2512
#define use_offset_0x1cd0u 2688
/* Vedic Extensions */
@ -332,20 +349,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 1CE0 */ VMAbv, VMPst, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, O, O, O, O, VMBlw, O, O,
/* 1CF0 */ O, O, VMPst, VMPst, VMAbv, CS, CS, VMPst, VMAbv, VMAbv, O, O, O, O, O, O,
#define use_offset_0x1df8u 2560
#define use_offset_0x1df8u 2736
/* Combining Diacritical Marks Supplement */
O, O, O, FM, O, O, O, O,
#define use_offset_0x2008u 2568
#define use_offset_0x2008u 2744
/* General Punctuation */
O, O, O, O, ZWNJ, ZWJ, O, O,
/* 2010 */ GB, GB, GB, GB, GB, O, O, O,
#define use_offset_0x2060u 2584
#define use_offset_0x2060u 2760
/* 2060 */ WJ, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
@ -354,20 +371,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 2070 */ O, O, O, O, FM, O, O, O, O, O, O, O, O, O, O, O,
/* 2080 */ O, O, FM, FM, FM, O, O, O,
#define use_offset_0x20f0u 2624
#define use_offset_0x20f0u 2800
/* Combining Diacritical Marks for Symbols */
/* 20F0 */ VMAbv, O, O, O, O, O, O, O,
#define use_offset_0x25c8u 2632
#define use_offset_0x25c8u 2808
/* Geometric Shapes */
O, O, O, O, GB, O, O, O,
#define use_offset_0xa800u 2640
#define use_offset_0xa800u 2816
/* Syloti Nagri */
@ -454,7 +471,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* AAE0 */ B, B, B, B, B, B, B, B, B, B, B, VPre, VBlw, VAbv, VPre, VPst,
/* AAF0 */ O, O, O, O, O, VMPst, H, O,
#define use_offset_0xabc0u 3400
#define use_offset_0xabc0u 3576
/* Meetei Mayek */
@ -464,14 +481,14 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O,
/* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0xfe00u 3464
#define use_offset_0xfe00u 3640
/* Variation Selectors */
/* FE00 */ VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS,
#define use_offset_0x10a00u 3480
#define use_offset_0x10a00u 3656
/* Kharoshthi */
@ -482,7 +499,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H,
/* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O,
#define use_offset_0x11000u 3560
#define use_offset_0x11000u 3736
/* Brahmi */
@ -503,7 +520,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O,
#define use_offset_0x11100u 3752
#define use_offset_0x11100u 3928
/* Chakma */
@ -511,7 +528,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11100 */ VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 11110 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 11120 */ B, B, B, B, B, B, B, VBlw, VBlw, VBlw, VAbv, VAbv, VPre, VBlw, VAbv, VAbv,
/* 11130 */ VBlw, VAbv, VAbv, H, CMAbv, O, B, B, B, B, B, B, B, B, B, B,
/* 11130 */ VBlw, VAbv, VAbv, H, CMBlw, O, B, B, B, B, B, B, B, B, B, B,
/* 11140 */ O, O, O, O, B, VPst, VPst, O, O, O, O, O, O, O, O, O,
/* Mahajani */
@ -541,7 +558,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw,
/* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O,
#define use_offset_0x11280u 4072
#define use_offset_0x11280u 4248
/* Multani */
@ -569,7 +586,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
/* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
#define use_offset_0x11400u 4320
#define use_offset_0x11400u 4496
/* Newa */
@ -592,7 +609,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 114C0 */ VMAbv, VMPst, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O,
/* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x11580u 4544
#define use_offset_0x11580u 4720
/* Siddham */
@ -635,7 +652,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11720 */ VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VAbv, VBlw, VAbv, VAbv, VAbv, O, O, O, O,
/* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O,
#define use_offset_0x11800u 4992
#define use_offset_0x11800u 5168
/* Dogra */
@ -645,7 +662,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw,
/* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O,
#define use_offset_0x11a00u 5056
#define use_offset_0x11a00u 5232
/* Zanabazar Square */
@ -664,7 +681,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11A80 */ B, B, B, B, O, O, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw,
/* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O,
#define use_offset_0x11c00u 5216
#define use_offset_0x11c00u 5392
/* Bhaiksuki */
@ -685,7 +702,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
/* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O,
#define use_offset_0x11d00u 5400
#define use_offset_0x11d00u 5576
/* Masaram Gondi */
@ -705,7 +722,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O,
/* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x11ee0u 5576
#define use_offset_0x11ee0u 5752
/* Makasar */
@ -713,7 +730,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O,
}; /* Table items: 5600; occupancy: 73% */
}; /* Table items: 5776; occupancy: 74% */
USE_TABLE_ELEMENT_TYPE
hb_use_get_category (hb_codepoint_t u)
@ -725,6 +742,7 @@ hb_use_get_category (hb_codepoint_t u)
if (hb_in_range<hb_codepoint_t> (u, 0x00A0u, 0x00D7u)) return use_table[u - 0x00A0u + use_offset_0x00a0u];
if (hb_in_range<hb_codepoint_t> (u, 0x0348u, 0x034Fu)) return use_table[u - 0x0348u + use_offset_0x0348u];
if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0DF7u)) return use_table[u - 0x0900u + use_offset_0x0900u];
if (hb_in_range<hb_codepoint_t> (u, 0x0F18u, 0x0FC7u)) return use_table[u - 0x0F18u + use_offset_0x0f18u];
break;
case 0x1u:

View File

@ -232,12 +232,6 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
return &_hb_ot_complex_shaper_hangul;
/* Unicode-2.0 additions */
case HB_SCRIPT_TIBETAN:
return &_hb_ot_complex_shaper_tibetan;
/* Unicode-1.1 additions */
case HB_SCRIPT_HEBREW:
@ -289,7 +283,7 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
/* Unicode-2.0 additions */
//case HB_SCRIPT_TIBETAN:
case HB_SCRIPT_TIBETAN:
/* Unicode-3.0 additions */
//case HB_SCRIPT_MONGOLIAN: