[indic-table] Fix block headers

This commit is contained in:
David Corbett 2022-06-24 20:37:01 -04:00 committed by Behdad Esfahbod
parent e35cfb4bde
commit 0f15cb12de
2 changed files with 28 additions and 37 deletions

View File

@ -41,7 +41,7 @@ files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
headers = [[f.readline () for i in range (2)] for f in files] headers = [[f.readline () for i in range (2)] for f in files]
data = [{} for _ in files] unicode_data = [{} for _ in files]
for i, f in enumerate (files): for i, f in enumerate (files):
for line in f: for line in f:
@ -63,12 +63,12 @@ for i, f in enumerate (files):
t = fields[1] t = fields[1]
for u in range (start, end + 1): for u in range (start, end + 1):
data[i][u] = t unicode_data[i][u] = t
# Merge data into one dict: # Merge data into one dict:
defaults = ('Other', 'Not_Applicable', 'No_Block') defaults = ('Other', 'Not_Applicable', 'No_Block')
combined = {} combined = {}
for i,d in enumerate (data): for i,d in enumerate (unicode_data):
for u,v in d.items (): for u,v in d.items ():
if i == 2 and not u in combined: if i == 2 and not u in combined:
continue continue
@ -76,8 +76,6 @@ for i,d in enumerate (data):
combined[u] = list (defaults) combined[u] = list (defaults)
combined[u][i] = v combined[u][i] = v
combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS} combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
data = combined
del combined
# Convert categories & positions types # Convert categories & positions types
@ -434,30 +432,29 @@ def position_to_category(pos):
defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2]) defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2])
new_data = {} indic_data = {}
for k, (cat, pos, block) in data.items(): for k, (cat, pos, block) in combined.items():
cat = category_map[cat] cat = category_map[cat]
pos = position_map[pos] pos = position_map[pos]
new_data[k] = (cat, pos, block) indic_data[k] = (cat, pos, block)
data = new_data
for k,new_cat in category_overrides.items(): for k,new_cat in category_overrides.items():
(cat, pos, block) = data.get(k, defaults) (cat, pos, _) = indic_data.get(k, defaults)
data[k] = (new_cat, pos, block) indic_data[k] = (new_cat, pos, unicode_data[2][k])
# We only expect position for certain types # We only expect position for certain types
positioned_categories = ('CM', 'SM', 'RS', 'H', 'M') positioned_categories = ('CM', 'SM', 'RS', 'H', 'M')
for k, (cat, pos, block) in data.items(): for k, (cat, pos, block) in indic_data.items():
if cat not in positioned_categories: if cat not in positioned_categories:
pos = 'END' pos = 'END'
data[k] = (cat, pos, block) indic_data[k] = (cat, pos, block)
# Position overrides are more complicated # Position overrides are more complicated
# Keep in sync with CONSONANT_FLAGS in the shaper # Keep in sync with CONSONANT_FLAGS in the shaper
consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE') consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE')
smvd_categories = ('SM', 'VD', 'A', 'Symbol') smvd_categories = ('SM', 'VD', 'A', 'Symbol')
for k, (cat, pos, block) in data.items(): for k, (cat, pos, block) in indic_data.items():
if cat in consonant_categories: if cat in consonant_categories:
pos = 'BASE_C' pos = 'BASE_C'
elif cat == 'M': elif cat == 'M':
@ -467,15 +464,15 @@ for k, (cat, pos, block) in data.items():
pos = indic_matra_position(u, pos, block) pos = indic_matra_position(u, pos, block)
elif cat in smvd_categories: elif cat in smvd_categories:
pos = 'SMVD'; pos = 'SMVD';
data[k] = (cat, pos, block) indic_data[k] = (cat, pos, block)
for k,new_pos in position_overrides.items(): for k,new_pos in position_overrides.items():
(cat, pos, block) = data.get(k, defaults) (cat, pos, _) = indic_data.get(k, defaults)
data[k] = (cat, new_pos, block) indic_data[k] = (cat, new_pos, unicode_data[2][k])
values = [{_: 1} for _ in defaults] values = [{_: 1} for _ in defaults]
for vv in data.values(): for vv in indic_data.values():
for i,v in enumerate(vv): for i,v in enumerate(vv):
values[i][v] = values[i].get (v, 0) + 1 values[i][v] = values[i].get (v, 0) + 1
@ -485,8 +482,8 @@ for vv in data.values():
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out # Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
singles = {} singles = {}
for u in ALLOWED_SINGLES: for u in ALLOWED_SINGLES:
singles[u] = data[u] singles[u] = indic_data[u]
del data[u] del indic_data[u]
print ("/* == Start of generated table == */") print ("/* == Start of generated table == */")
print ("/*") print ("/*")
@ -620,7 +617,7 @@ def print_block (block, start, end, data):
if block: if block:
last_block = block last_block = block
uu = sorted (data.keys ()) uu = sorted (indic_data)
last = -100000 last = -100000
num = 0 num = 0
@ -631,17 +628,17 @@ print ("static const uint16_t indic_table[] = {")
for u in uu: for u in uu:
if u <= last: if u <= last:
continue continue
block = data[u][2] block = indic_data[u][2]
start = u//8*8 start = u//8*8
end = start+1 end = start+1
while end in uu and block == data[end][2]: while end in uu and block == indic_data[end][2]:
end += 1 end += 1
end = (end-1)//8*8 + 7 end = (end-1)//8*8 + 7
if start != last + 1: if start != last + 1:
if start - last <= 1+16*3: if start - last <= 1+16*3:
print_block (None, last+1, start-1, data) print_block (None, last+1, start-1, indic_data)
else: else:
if last >= 0: if last >= 0:
ends.append (last + 1) ends.append (last + 1)
@ -651,7 +648,7 @@ for u in uu:
print ("#define indic_offset_0x%04xu %d" % (start, offset)) print ("#define indic_offset_0x%04xu %d" % (start, offset))
starts.append (start) starts.append (start)
print_block (block, start, end, data) print_block (block, start, end, indic_data)
last = end last = end
ends.append (last + 1) ends.append (last + 1)
offset += ends[-1] - starts[-1] offset += ends[-1] - starts[-1]

View File

@ -63,7 +63,7 @@ static_assert (OT_VPst == M_Cat(VPst), "");
#define OT_IV M_Cat(IV) #define OT_IV M_Cat(IV)
#define OT_As M_Cat(As) #define OT_As M_Cat(As)
#define OT_DB M_Cat(DB) #define OT_DB M_Cat(DB)
#define OT_GB M_Cat(GB) #define OT_GB M_Cat(GB)
#define OT_MH M_Cat(MH) #define OT_MH M_Cat(MH)
#define OT_MR M_Cat(MR) #define OT_MR M_Cat(MR)
#define OT_MW M_Cat(MW) #define OT_MW M_Cat(MW)
@ -396,13 +396,7 @@ static const uint16_t indic_table[] = {
/* 1CD8 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), /* 1CD8 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM),
/* 1CE0 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), /* 1CE0 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM),
/* 1CE8 */ _(A,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(A,SM), _(S,SM), _(S,SM), /* 1CE8 */ _(A,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(A,SM), _(S,SM), _(S,SM),
/* No_Block */
/* 1CF0 */ _(S,SM), _(S,SM), _(C,C), _(C,C), _(A,SM), _(C,C), _(C,C), _(A,SM), /* 1CF0 */ _(S,SM), _(S,SM), _(C,C), _(C,C), _(A,SM), _(C,C), _(C,C), _(A,SM),
/* Vedic Extensions */
/* 1CF8 */ _(A,SM), _(A,SM), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 1CF8 */ _(A,SM), _(A,SM), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
#define indic_offset_0x2008u 1656 #define indic_offset_0x2008u 1656
@ -413,9 +407,6 @@ static const uint16_t indic_table[] = {
/* 2008 */ _(X,X), _(X,X), _(X,X), _(X,X),_(ZWNJ,X),_(ZWJ,X), _(X,X), _(X,X), /* 2008 */ _(X,X), _(X,X), _(X,X), _(X,X),_(ZWNJ,X),_(ZWJ,X), _(X,X), _(X,X),
/* 2010 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), _(X,X), /* 2010 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), _(X,X),
/* 2018 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 2018 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* No_Block */
/* 2020 */ _(X,X), _(X,X), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 2020 */ _(X,X), _(X,X), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
#define indic_offset_0x2070u 1688 #define indic_offset_0x2070u 1688
@ -430,7 +421,7 @@ static const uint16_t indic_table[] = {
#define indic_offset_0x25f8u 1712 #define indic_offset_0x25f8u 1712
/* No_Block */ /* Geometric Shapes */
/* 25F8 */ _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), /* 25F8 */ _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X),
@ -467,13 +458,16 @@ static const uint16_t indic_table[] = {
#define indic_offset_0xfe00u 1816 #define indic_offset_0xfe00u 1816
/* No_Block */ /* Variation Selectors */
/* FE00 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), /* FE00 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X),
/* FE08 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), /* FE08 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X),
#define indic_offset_0x11300u 1832 #define indic_offset_0x11300u 1832
/* Grantha */
/* 11300 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X), _(X,X), /* 11300 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X), _(X,X),
/* 11308 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 11308 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),
/* 11310 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* 11310 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X),