Remove obsolete overrides from Indic/USE scripts
This commit is contained in:
parent
b38bab8622
commit
8c42f03215
|
@ -79,10 +79,6 @@ data = combined
|
||||||
del combined
|
del combined
|
||||||
num = len (data)
|
num = len (data)
|
||||||
|
|
||||||
for u in [0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D3]:
|
|
||||||
if data[u][0] == 'Other':
|
|
||||||
data[u][0] = "Vowel_Dependent"
|
|
||||||
|
|
||||||
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
|
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
|
||||||
singles = {}
|
singles = {}
|
||||||
for u in ALLOWED_SINGLES:
|
for u in ALLOWED_SINGLES:
|
||||||
|
|
|
@ -48,7 +48,6 @@ defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
|
||||||
# TODO Characters that are not in Unicode Indic files, but used in USE
|
# TODO Characters that are not in Unicode Indic files, but used in USE
|
||||||
data[0][0x034F] = defaults[0]
|
data[0][0x034F] = defaults[0]
|
||||||
data[0][0x2060] = defaults[0]
|
data[0][0x2060] = defaults[0]
|
||||||
data[0][0x20F0] = defaults[0]
|
|
||||||
# TODO https://github.com/roozbehp/unicode-data/issues/9
|
# TODO https://github.com/roozbehp/unicode-data/issues/9
|
||||||
data[0][0x11C44] = 'Consonant_Placeholder'
|
data[0][0x11C44] = 'Consonant_Placeholder'
|
||||||
data[0][0x11C45] = 'Consonant_Placeholder'
|
data[0][0x11C45] = 'Consonant_Placeholder'
|
||||||
|
@ -317,12 +316,11 @@ def map_to_use(data):
|
||||||
|
|
||||||
# Resolve Indic_Syllabic_Category
|
# Resolve Indic_Syllabic_Category
|
||||||
|
|
||||||
# TODO: These don't have UISC assigned in Unicode 8.0, but have UIPC
|
# TODO: These don't have UISC assigned in Unicode 12.0, but have UIPC
|
||||||
if U == 0x17DD: UISC = Vowel_Dependent
|
|
||||||
if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
|
if 0x1CE2 <= U <= 0x1CE8: UISC = Cantillation_Mark
|
||||||
|
|
||||||
# Tibetan:
|
# Tibetan:
|
||||||
# TODO: These don't have UISC assigned in Unicode 11.0, but have UIPC
|
# TODO: These don't have UISC assigned in Unicode 12.0, but have UIPC
|
||||||
if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
|
if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F: UISC = Vowel_Dependent
|
||||||
if 0x0F86 <= U <= 0x0F87: UISC = Tone_Mark
|
if 0x0F86 <= U <= 0x0F87: UISC = Tone_Mark
|
||||||
# Overrides to allow NFC order matching syllable
|
# Overrides to allow NFC order matching syllable
|
||||||
|
@ -347,13 +345,7 @@ def map_to_use(data):
|
||||||
if U == 0x1CED: UISC = Tone_Mark
|
if U == 0x1CED: UISC = Tone_Mark
|
||||||
|
|
||||||
# TODO: https://github.com/harfbuzz/harfbuzz/issues/525
|
# TODO: https://github.com/harfbuzz/harfbuzz/issues/525
|
||||||
if U == 0x1A7F: UISC = Consonant_Final; UIPC = Bottom
|
if U == 0x1A7F: UISC = Consonant_Final
|
||||||
|
|
||||||
# TODO: https://github.com/harfbuzz/harfbuzz/pull/609
|
|
||||||
if U == 0x20F0: UISC = Cantillation_Mark; UIPC = Top
|
|
||||||
|
|
||||||
# TODO: https://github.com/harfbuzz/harfbuzz/pull/626
|
|
||||||
if U == 0xA8B4: UISC = Consonant_Medial
|
|
||||||
|
|
||||||
# TODO: https://github.com/harfbuzz/harfbuzz/issues/1105
|
# TODO: https://github.com/harfbuzz/harfbuzz/issues/1105
|
||||||
if U == 0x11134: UISC = Gemination_Mark
|
if U == 0x11134: UISC = Gemination_Mark
|
||||||
|
@ -367,26 +359,18 @@ def map_to_use(data):
|
||||||
|
|
||||||
# Resolve Indic_Positional_Category
|
# Resolve Indic_Positional_Category
|
||||||
|
|
||||||
# TODO: Not in Unicode 8.0 yet, but in spec.
|
# TODO: These should die, but have UIPC in Unicode 12.0
|
||||||
if U == 0x1B6C: UIPC = Bottom
|
|
||||||
|
|
||||||
# TODO: These should die, but have UIPC in Unicode 8.0
|
|
||||||
if U in [0x953, 0x954]: UIPC = Not_Applicable
|
if U in [0x953, 0x954]: UIPC = Not_Applicable
|
||||||
|
|
||||||
# TODO: In USE's override list but not in Unicode 11.0
|
# TODO: In USE's override list but not in Unicode 12.0
|
||||||
if U == 0x103C: UIPC = Left
|
if U == 0x103C: UIPC = Left
|
||||||
|
|
||||||
# TODO: These are not in USE's override list that we have, nor are they in Unicode 11.0
|
# TODO: These are not in USE's override list that we have, nor are they in Unicode 12.0
|
||||||
if 0xA926 <= U <= 0xA92A: UIPC = Top
|
if 0xA926 <= U <= 0xA92A: UIPC = Top
|
||||||
if U == 0x111CA: UIPC = Bottom
|
|
||||||
if U == 0x11300: UIPC = Top
|
|
||||||
# TODO: https://github.com/harfbuzz/harfbuzz/pull/1037
|
# TODO: https://github.com/harfbuzz/harfbuzz/pull/1037
|
||||||
if U == 0x11302: UIPC = Top
|
if U == 0x11302: UIPC = Top
|
||||||
if U == 0x1133C: UIPC = Bottom
|
if U == 0x1171E: UIPC = Left
|
||||||
if U == 0x1171E: UIPC = Left # Correct?!
|
|
||||||
if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
|
if 0x1CF8 <= U <= 0x1CF9: UIPC = Top
|
||||||
# https://github.com/roozbehp/unicode-data/issues/8
|
|
||||||
if U == 0x0A51: UIPC = Bottom
|
|
||||||
|
|
||||||
assert (UIPC in [Not_Applicable, Visual_Order_Left] or
|
assert (UIPC in [Not_Applicable, Visual_Order_Left] or
|
||||||
USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
|
USE in use_positions), "%s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC)
|
||||||
|
|
|
@ -261,7 +261,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
|
||||||
/* 17A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
|
/* 17A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
|
||||||
/* 17B0 */ B, B, B, B, O, O, VPst, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VPst, VPst,
|
/* 17B0 */ B, B, B, B, O, O, VPst, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VPst, VPst,
|
||||||
/* 17C0 */ VPst, VPre, VPre, VPre, VPst, VPst, VMAbv, VMPst, VPst, VMAbv, VMAbv, FM, FAbv, CMAbv, FM, FM,
|
/* 17C0 */ VPst, VPre, VPre, VPre, VPst, VPst, VMAbv, VMPst, VPst, VMAbv, VMAbv, FM, FAbv, CMAbv, FM, FM,
|
||||||
/* 17D0 */ FM, VAbv, H, FM, O, O, O, O, O, O, O, O, B, VAbv, O, O,
|
/* 17D0 */ FM, VAbv, H, FM, O, O, O, O, O, O, O, O, B, FM, O, O,
|
||||||
/* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
|
/* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
|
||||||
|
|
||||||
#define use_offset_0x1900u 1936
|
#define use_offset_0x1900u 1936
|
||||||
|
|
Loading…
Reference in New Issue