[indic-table] Fix block headers
This commit is contained in:
parent
e35cfb4bde
commit
0f15cb12de
|
@ -41,7 +41,7 @@ files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
|
||||||
|
|
||||||
headers = [[f.readline () for i in range (2)] for f in files]
|
headers = [[f.readline () for i in range (2)] for f in files]
|
||||||
|
|
||||||
data = [{} for _ in files]
|
unicode_data = [{} for _ in files]
|
||||||
for i, f in enumerate (files):
|
for i, f in enumerate (files):
|
||||||
for line in f:
|
for line in f:
|
||||||
|
|
||||||
|
@ -63,12 +63,12 @@ for i, f in enumerate (files):
|
||||||
t = fields[1]
|
t = fields[1]
|
||||||
|
|
||||||
for u in range (start, end + 1):
|
for u in range (start, end + 1):
|
||||||
data[i][u] = t
|
unicode_data[i][u] = t
|
||||||
|
|
||||||
# Merge data into one dict:
|
# Merge data into one dict:
|
||||||
defaults = ('Other', 'Not_Applicable', 'No_Block')
|
defaults = ('Other', 'Not_Applicable', 'No_Block')
|
||||||
combined = {}
|
combined = {}
|
||||||
for i,d in enumerate (data):
|
for i,d in enumerate (unicode_data):
|
||||||
for u,v in d.items ():
|
for u,v in d.items ():
|
||||||
if i == 2 and not u in combined:
|
if i == 2 and not u in combined:
|
||||||
continue
|
continue
|
||||||
|
@ -76,8 +76,6 @@ for i,d in enumerate (data):
|
||||||
combined[u] = list (defaults)
|
combined[u] = list (defaults)
|
||||||
combined[u][i] = v
|
combined[u][i] = v
|
||||||
combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
|
combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
|
||||||
data = combined
|
|
||||||
del combined
|
|
||||||
|
|
||||||
|
|
||||||
# Convert categories & positions types
|
# Convert categories & positions types
|
||||||
|
@ -434,30 +432,29 @@ def position_to_category(pos):
|
||||||
|
|
||||||
defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2])
|
defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2])
|
||||||
|
|
||||||
new_data = {}
|
indic_data = {}
|
||||||
for k, (cat, pos, block) in data.items():
|
for k, (cat, pos, block) in combined.items():
|
||||||
cat = category_map[cat]
|
cat = category_map[cat]
|
||||||
pos = position_map[pos]
|
pos = position_map[pos]
|
||||||
new_data[k] = (cat, pos, block)
|
indic_data[k] = (cat, pos, block)
|
||||||
data = new_data
|
|
||||||
|
|
||||||
for k,new_cat in category_overrides.items():
|
for k,new_cat in category_overrides.items():
|
||||||
(cat, pos, block) = data.get(k, defaults)
|
(cat, pos, _) = indic_data.get(k, defaults)
|
||||||
data[k] = (new_cat, pos, block)
|
indic_data[k] = (new_cat, pos, unicode_data[2][k])
|
||||||
|
|
||||||
# We only expect position for certain types
|
# We only expect position for certain types
|
||||||
positioned_categories = ('CM', 'SM', 'RS', 'H', 'M')
|
positioned_categories = ('CM', 'SM', 'RS', 'H', 'M')
|
||||||
for k, (cat, pos, block) in data.items():
|
for k, (cat, pos, block) in indic_data.items():
|
||||||
if cat not in positioned_categories:
|
if cat not in positioned_categories:
|
||||||
pos = 'END'
|
pos = 'END'
|
||||||
data[k] = (cat, pos, block)
|
indic_data[k] = (cat, pos, block)
|
||||||
|
|
||||||
# Position overrides are more complicated
|
# Position overrides are more complicated
|
||||||
|
|
||||||
# Keep in sync with CONSONANT_FLAGS in the shaper
|
# Keep in sync with CONSONANT_FLAGS in the shaper
|
||||||
consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE')
|
consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE')
|
||||||
smvd_categories = ('SM', 'VD', 'A', 'Symbol')
|
smvd_categories = ('SM', 'VD', 'A', 'Symbol')
|
||||||
for k, (cat, pos, block) in data.items():
|
for k, (cat, pos, block) in indic_data.items():
|
||||||
if cat in consonant_categories:
|
if cat in consonant_categories:
|
||||||
pos = 'BASE_C'
|
pos = 'BASE_C'
|
||||||
elif cat == 'M':
|
elif cat == 'M':
|
||||||
|
@ -467,15 +464,15 @@ for k, (cat, pos, block) in data.items():
|
||||||
pos = indic_matra_position(u, pos, block)
|
pos = indic_matra_position(u, pos, block)
|
||||||
elif cat in smvd_categories:
|
elif cat in smvd_categories:
|
||||||
pos = 'SMVD';
|
pos = 'SMVD';
|
||||||
data[k] = (cat, pos, block)
|
indic_data[k] = (cat, pos, block)
|
||||||
|
|
||||||
for k,new_pos in position_overrides.items():
|
for k,new_pos in position_overrides.items():
|
||||||
(cat, pos, block) = data.get(k, defaults)
|
(cat, pos, _) = indic_data.get(k, defaults)
|
||||||
data[k] = (cat, new_pos, block)
|
indic_data[k] = (cat, new_pos, unicode_data[2][k])
|
||||||
|
|
||||||
|
|
||||||
values = [{_: 1} for _ in defaults]
|
values = [{_: 1} for _ in defaults]
|
||||||
for vv in data.values():
|
for vv in indic_data.values():
|
||||||
for i,v in enumerate(vv):
|
for i,v in enumerate(vv):
|
||||||
values[i][v] = values[i].get (v, 0) + 1
|
values[i][v] = values[i].get (v, 0) + 1
|
||||||
|
|
||||||
|
@ -485,8 +482,8 @@ for vv in data.values():
|
||||||
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
|
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
|
||||||
singles = {}
|
singles = {}
|
||||||
for u in ALLOWED_SINGLES:
|
for u in ALLOWED_SINGLES:
|
||||||
singles[u] = data[u]
|
singles[u] = indic_data[u]
|
||||||
del data[u]
|
del indic_data[u]
|
||||||
|
|
||||||
print ("/* == Start of generated table == */")
|
print ("/* == Start of generated table == */")
|
||||||
print ("/*")
|
print ("/*")
|
||||||
|
@ -620,7 +617,7 @@ def print_block (block, start, end, data):
|
||||||
if block:
|
if block:
|
||||||
last_block = block
|
last_block = block
|
||||||
|
|
||||||
uu = sorted (data.keys ())
|
uu = sorted (indic_data)
|
||||||
|
|
||||||
last = -100000
|
last = -100000
|
||||||
num = 0
|
num = 0
|
||||||
|
@ -631,17 +628,17 @@ print ("static const uint16_t indic_table[] = {")
|
||||||
for u in uu:
|
for u in uu:
|
||||||
if u <= last:
|
if u <= last:
|
||||||
continue
|
continue
|
||||||
block = data[u][2]
|
block = indic_data[u][2]
|
||||||
|
|
||||||
start = u//8*8
|
start = u//8*8
|
||||||
end = start+1
|
end = start+1
|
||||||
while end in uu and block == data[end][2]:
|
while end in uu and block == indic_data[end][2]:
|
||||||
end += 1
|
end += 1
|
||||||
end = (end-1)//8*8 + 7
|
end = (end-1)//8*8 + 7
|
||||||
|
|
||||||
if start != last + 1:
|
if start != last + 1:
|
||||||
if start - last <= 1+16*3:
|
if start - last <= 1+16*3:
|
||||||
print_block (None, last+1, start-1, data)
|
print_block (None, last+1, start-1, indic_data)
|
||||||
else:
|
else:
|
||||||
if last >= 0:
|
if last >= 0:
|
||||||
ends.append (last + 1)
|
ends.append (last + 1)
|
||||||
|
@ -651,7 +648,7 @@ for u in uu:
|
||||||
print ("#define indic_offset_0x%04xu %d" % (start, offset))
|
print ("#define indic_offset_0x%04xu %d" % (start, offset))
|
||||||
starts.append (start)
|
starts.append (start)
|
||||||
|
|
||||||
print_block (block, start, end, data)
|
print_block (block, start, end, indic_data)
|
||||||
last = end
|
last = end
|
||||||
ends.append (last + 1)
|
ends.append (last + 1)
|
||||||
offset += ends[-1] - starts[-1]
|
offset += ends[-1] - starts[-1]
|
||||||
|
|
|
@ -63,7 +63,7 @@ static_assert (OT_VPst == M_Cat(VPst), "");
|
||||||
#define OT_IV M_Cat(IV)
|
#define OT_IV M_Cat(IV)
|
||||||
#define OT_As M_Cat(As)
|
#define OT_As M_Cat(As)
|
||||||
#define OT_DB M_Cat(DB)
|
#define OT_DB M_Cat(DB)
|
||||||
#define OT_GB M_Cat(GB)
|
#define OT_GB M_Cat(GB)
|
||||||
#define OT_MH M_Cat(MH)
|
#define OT_MH M_Cat(MH)
|
||||||
#define OT_MR M_Cat(MR)
|
#define OT_MR M_Cat(MR)
|
||||||
#define OT_MW M_Cat(MW)
|
#define OT_MW M_Cat(MW)
|
||||||
|
@ -396,13 +396,7 @@ static const uint16_t indic_table[] = {
|
||||||
/* 1CD8 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM),
|
/* 1CD8 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM),
|
||||||
/* 1CE0 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM),
|
/* 1CE0 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM),
|
||||||
/* 1CE8 */ _(A,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(A,SM), _(S,SM), _(S,SM),
|
/* 1CE8 */ _(A,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(A,SM), _(S,SM), _(S,SM),
|
||||||
|
|
||||||
/* No_Block */
|
|
||||||
|
|
||||||
/* 1CF0 */ _(S,SM), _(S,SM), _(C,C), _(C,C), _(A,SM), _(C,C), _(C,C), _(A,SM),
|
/* 1CF0 */ _(S,SM), _(S,SM), _(C,C), _(C,C), _(A,SM), _(C,C), _(C,C), _(A,SM),
|
||||||
|
|
||||||
/* Vedic Extensions */
|
|
||||||
|
|
||||||
/* 1CF8 */ _(A,SM), _(A,SM), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
/* 1CF8 */ _(A,SM), _(A,SM), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
||||||
|
|
||||||
#define indic_offset_0x2008u 1656
|
#define indic_offset_0x2008u 1656
|
||||||
|
@ -413,9 +407,6 @@ static const uint16_t indic_table[] = {
|
||||||
/* 2008 */ _(X,X), _(X,X), _(X,X), _(X,X),_(ZWNJ,X),_(ZWJ,X), _(X,X), _(X,X),
|
/* 2008 */ _(X,X), _(X,X), _(X,X), _(X,X),_(ZWNJ,X),_(ZWJ,X), _(X,X), _(X,X),
|
||||||
/* 2010 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), _(X,X),
|
/* 2010 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), _(X,X),
|
||||||
/* 2018 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
/* 2018 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
||||||
|
|
||||||
/* No_Block */
|
|
||||||
|
|
||||||
/* 2020 */ _(X,X), _(X,X), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
/* 2020 */ _(X,X), _(X,X), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
||||||
|
|
||||||
#define indic_offset_0x2070u 1688
|
#define indic_offset_0x2070u 1688
|
||||||
|
@ -430,7 +421,7 @@ static const uint16_t indic_table[] = {
|
||||||
#define indic_offset_0x25f8u 1712
|
#define indic_offset_0x25f8u 1712
|
||||||
|
|
||||||
|
|
||||||
/* No_Block */
|
/* Geometric Shapes */
|
||||||
|
|
||||||
/* 25F8 */ _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X),
|
/* 25F8 */ _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X),
|
||||||
|
|
||||||
|
@ -467,13 +458,16 @@ static const uint16_t indic_table[] = {
|
||||||
#define indic_offset_0xfe00u 1816
|
#define indic_offset_0xfe00u 1816
|
||||||
|
|
||||||
|
|
||||||
/* No_Block */
|
/* Variation Selectors */
|
||||||
|
|
||||||
/* FE00 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X),
|
/* FE00 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X),
|
||||||
/* FE08 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X),
|
/* FE08 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X),
|
||||||
|
|
||||||
#define indic_offset_0x11300u 1832
|
#define indic_offset_0x11300u 1832
|
||||||
|
|
||||||
|
|
||||||
|
/* Grantha */
|
||||||
|
|
||||||
/* 11300 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X), _(X,X),
|
/* 11300 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X), _(X,X),
|
||||||
/* 11308 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
/* 11308 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
||||||
/* 11310 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
/* 11310 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
|
||||||
|
|
Loading…
Reference in New Issue