[indic-generator] Move category overrides to generator
This commit is contained in:
parent
58eeb3a180
commit
4907514026
|
@ -42,7 +42,6 @@ files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
|
||||||
headers = [[f.readline () for i in range (2)] for f in files]
|
headers = [[f.readline () for i in range (2)] for f in files]
|
||||||
|
|
||||||
data = [{} for _ in files]
|
data = [{} for _ in files]
|
||||||
values = [{} for _ in files]
|
|
||||||
for i, f in enumerate (files):
|
for i, f in enumerate (files):
|
||||||
for line in f:
|
for line in f:
|
||||||
|
|
||||||
|
@ -65,12 +64,9 @@ for i, f in enumerate (files):
|
||||||
|
|
||||||
for u in range (start, end + 1):
|
for u in range (start, end + 1):
|
||||||
data[i][u] = t
|
data[i][u] = t
|
||||||
values[i][t] = values[i].get (t, 0) + end - start + 1
|
|
||||||
|
|
||||||
# Merge data into one dict:
|
# Merge data into one dict:
|
||||||
defaults = ('Other', 'Not_Applicable', 'No_Block')
|
defaults = ('Other', 'Not_Applicable', 'No_Block')
|
||||||
for i,v in enumerate (defaults):
|
|
||||||
values[i][v] = values[i].get (v, 0) + 1
|
|
||||||
combined = {}
|
combined = {}
|
||||||
for i,d in enumerate (data):
|
for i,d in enumerate (data):
|
||||||
for u,v in d.items ():
|
for u,v in d.items ():
|
||||||
|
@ -84,7 +80,7 @@ data = combined
|
||||||
del combined
|
del combined
|
||||||
|
|
||||||
|
|
||||||
# Convert data
|
# Convert categories & positions types
|
||||||
|
|
||||||
category_map = {
|
category_map = {
|
||||||
'Other' : 'X',
|
'Other' : 'X',
|
||||||
|
@ -123,33 +119,94 @@ category_map = {
|
||||||
'Vowel' : 'V',
|
'Vowel' : 'V',
|
||||||
'Vowel_Dependent' : 'M',
|
'Vowel_Dependent' : 'M',
|
||||||
'Vowel_Independent' : 'V',
|
'Vowel_Independent' : 'V',
|
||||||
|
'Dotted_Circle' : 'DOTTEDCIRCLE', # Ours, not Unicode's
|
||||||
|
}
|
||||||
|
|
||||||
|
category_overrides = {
|
||||||
|
|
||||||
|
# The following act more like the Bindus.
|
||||||
|
0x0953: 'SM',
|
||||||
|
0x0954: 'SM',
|
||||||
|
|
||||||
|
# The following act like consonants.
|
||||||
|
0x0A72: 'C',
|
||||||
|
0x0A73: 'C',
|
||||||
|
0x1CF5: 'C',
|
||||||
|
0x1CF6: 'C',
|
||||||
|
|
||||||
|
# TODO: The following should only be allowed after a Visarga.
|
||||||
|
# For now, just treat them like regular tone marks.
|
||||||
|
0x1CE2: 'A',
|
||||||
|
0x1CE3: 'A',
|
||||||
|
0x1CE4: 'A',
|
||||||
|
0x1CE5: 'A',
|
||||||
|
0x1CE6: 'A',
|
||||||
|
0x1CE7: 'A',
|
||||||
|
0x1CE8: 'A',
|
||||||
|
|
||||||
|
# TODO: The following should only be allowed after some of
|
||||||
|
# the nasalization marks, maybe only for U+1CE9..U+1CF1.
|
||||||
|
# For now, just treat them like tone marks.
|
||||||
|
0x1CED: 'A',
|
||||||
|
|
||||||
|
# The following take marks in standalone clusters, similar to Avagraha.
|
||||||
|
0xA8F2: 'Symbol',
|
||||||
|
0xA8F3: 'Symbol',
|
||||||
|
0xA8F4: 'Symbol',
|
||||||
|
0xA8F5: 'Symbol',
|
||||||
|
0xA8F6: 'Symbol',
|
||||||
|
0xA8F7: 'Symbol',
|
||||||
|
0x1CE9: 'Symbol',
|
||||||
|
0x1CEA: 'Symbol',
|
||||||
|
0x1CEB: 'Symbol',
|
||||||
|
0x1CEC: 'Symbol',
|
||||||
|
0x1CEE: 'Symbol',
|
||||||
|
0x1CEF: 'Symbol',
|
||||||
|
0x1CF0: 'Symbol',
|
||||||
|
0x1CF1: 'Symbol',
|
||||||
|
|
||||||
|
0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524
|
||||||
|
|
||||||
|
# According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
|
||||||
|
# so the Indic shaper needs to know their categories.
|
||||||
|
0x11301: 'SM',
|
||||||
|
0x11302: 'SM',
|
||||||
|
0x11303: 'SM',
|
||||||
|
0x1133B: 'N',
|
||||||
|
0x1133C: 'N',
|
||||||
|
|
||||||
|
0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552
|
||||||
|
0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849
|
||||||
|
|
||||||
|
0x0980: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/538
|
||||||
|
0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613
|
||||||
|
0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623
|
||||||
|
0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511
|
||||||
|
|
||||||
|
0x2010: 'PLACEHOLDER',
|
||||||
|
0x2011: 'PLACEHOLDER',
|
||||||
|
|
||||||
|
0x25CC: 'DOTTEDCIRCLE',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
new_values0 = {}
|
|
||||||
for k,v in values[0].items():
|
|
||||||
new_values0[category_map[k]] = new_values0.get(category_map[k], 0) + v
|
|
||||||
values[0] = new_values0
|
|
||||||
defaults = (category_map[defaults[0]], defaults[1], defaults[2])
|
defaults = (category_map[defaults[0]], defaults[1], defaults[2])
|
||||||
|
|
||||||
new_data = {}
|
new_data = {}
|
||||||
for key, (cat, pos, block) in data.items():
|
for key, (cat, pos, block) in data.items():
|
||||||
|
|
||||||
cat = category_map[cat]
|
cat = category_map[cat]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
new_data[key] = (cat, pos, block)
|
new_data[key] = (cat, pos, block)
|
||||||
data = new_data
|
data = new_data
|
||||||
|
|
||||||
|
for k,new_cat in category_overrides.items():
|
||||||
|
(cat, pos, block) in data.get(k, defaults)
|
||||||
|
data[k] = (new_cat, pos, block)
|
||||||
|
|
||||||
|
values = [{_: 1} for _ in defaults]
|
||||||
|
for vv in data.values():
|
||||||
|
for i,v in enumerate(vv):
|
||||||
|
values[i][v] = values[i].get (v, 0) + 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -184,6 +241,7 @@ print ()
|
||||||
short = [{
|
short = [{
|
||||||
"Coeng": 'Co',
|
"Coeng": 'Co',
|
||||||
"PLACEHOLDER": 'GB',
|
"PLACEHOLDER": 'GB',
|
||||||
|
"DOTTEDCIRCLE": 'DC',
|
||||||
},{
|
},{
|
||||||
"Not_Applicable": 'x',
|
"Not_Applicable": 'x',
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -23,40 +23,37 @@
|
||||||
#pragma GCC diagnostic push
|
#pragma GCC diagnostic push
|
||||||
#pragma GCC diagnostic ignored "-Wunused-macros"
|
#pragma GCC diagnostic ignored "-Wunused-macros"
|
||||||
|
|
||||||
#define ISC_A OT_A /* 59 chars; A */
|
#define ISC_A OT_A /* 51 chars; A */
|
||||||
#define ISC_C OT_C /* 2226 chars; C */
|
#define ISC_C OT_C /* 532 chars; C */
|
||||||
#define ISC_CM OT_CM /* 196 chars; CM */
|
#define ISC_CM OT_CM /* 10 chars; CM */
|
||||||
#define ISC_CS OT_CS /* 8 chars; CS */
|
#define ISC_CS OT_CS /* 2 chars; CS */
|
||||||
#define ISC_Co OT_Coeng /* 12 chars; Coeng */
|
#define ISC_Co OT_Coeng /* 2 chars; Coeng */
|
||||||
#define ISC_H OT_H /* 27 chars; H */
|
#define ISC_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */
|
||||||
#define ISC_M OT_M /* 713 chars; M */
|
#define ISC_H OT_H /* 10 chars; H */
|
||||||
#define ISC_N OT_N /* 74 chars; N */
|
#define ISC_M OT_M /* 209 chars; M */
|
||||||
#define ISC_GB OT_PLACEHOLDER /* 534 chars; PLACEHOLDER */
|
#define ISC_N OT_N /* 35 chars; N */
|
||||||
#define ISC_RS OT_RS /* 2 chars; RS */
|
#define ISC_GB OT_PLACEHOLDER /* 168 chars; PLACEHOLDER */
|
||||||
#define ISC_R OT_Repha /* 3 chars; Repha */
|
#define ISC_RS OT_RS /* 2 chars; RS */
|
||||||
#define ISC_SM OT_SM /* 154 chars; SM */
|
#define ISC_R OT_Repha /* 1 chars; Repha */
|
||||||
#define ISC_S OT_Symbol /* 17 chars; Symbol */
|
#define ISC_SM OT_SM /* 56 chars; SM */
|
||||||
#define ISC_V OT_V /* 516 chars; V */
|
#define ISC_S OT_Symbol /* 22 chars; Symbol */
|
||||||
#define ISC_X OT_X /* 19 chars; X */
|
#define ISC_V OT_V /* 190 chars; V */
|
||||||
#define ISC_ZWJ OT_ZWJ /* 1 chars; ZWJ */
|
#define ISC_X OT_X /* 2 chars; X */
|
||||||
#define ISC_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */
|
#define ISC_ZWJ OT_ZWJ /* 1 chars; ZWJ */
|
||||||
|
#define ISC_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */
|
||||||
|
|
||||||
#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 352 chars; Bottom */
|
#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 77 chars; Bottom */
|
||||||
#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */
|
#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 21 chars; Left */
|
||||||
#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 4 chars; Bottom_And_Right */
|
#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 14 chars; Left_And_Right */
|
||||||
#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 64 chars; Left */
|
#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 904 chars; Not_Applicable */
|
||||||
#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 22 chars; Left_And_Right */
|
#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 44 chars; Overstruck */
|
||||||
#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */
|
#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 98 chars; Right */
|
||||||
#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */
|
#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 122 chars; Top */
|
||||||
#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 290 chars; Right */
|
#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 1 chars; Top_And_Bottom */
|
||||||
#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 418 chars; Top */
|
#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 1 chars; Top_And_Bottom_And_Left */
|
||||||
#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */
|
#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 3 chars; Top_And_Left */
|
||||||
#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 2 chars; Top_And_Bottom_And_Left */
|
#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 3 chars; Top_And_Left_And_Right */
|
||||||
#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */
|
#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 7 chars; Top_And_Right */
|
||||||
#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */
|
|
||||||
#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */
|
|
||||||
#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */
|
|
||||||
#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */
|
|
||||||
|
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
@ -101,16 +98,19 @@ static const uint16_t indic_table[] = {
|
||||||
/* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(S,x), _(M,R), _(M,L),
|
/* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(S,x), _(M,R), _(M,L),
|
||||||
/* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T),
|
/* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T),
|
||||||
/* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(H,B), _(M,L), _(M,R),
|
/* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(H,B), _(M,L), _(M,R),
|
||||||
/* 0950 */ _(X,x), _(A,T), _(A,B), _(X,T), _(X,T), _(M,T), _(M,B), _(M,B),
|
/* 0950 */ _(X,x), _(A,T), _(A,B), _(SM,O), _(SM,O), _(M,T), _(M,B), _(M,B),
|
||||||
/* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
/* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||||
/* 0960 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
/* 0960 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||||
/* 0968 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
/* 0968 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||||
/* 0970 */ _(X,x), _(X,x), _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(V,x),
|
/* 0970 */ _(X,x), _(X,x), _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(V,x),
|
||||||
/* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
/* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||||
|
|
||||||
|
/* Vedic Extensions */
|
||||||
|
|
||||||
|
/* 0980 */ _(GB,O), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x),
|
||||||
|
|
||||||
/* Bengali */
|
/* Bengali */
|
||||||
|
|
||||||
/* 0980 */ _(GB,x), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x),
|
|
||||||
/* 0988 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(X,x), _(V,x),
|
/* 0988 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(X,x), _(V,x),
|
||||||
/* 0990 */ _(V,x), _(X,x), _(X,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
/* 0990 */ _(V,x), _(X,x), _(X,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
||||||
/* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
/* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||||
|
@ -125,7 +125,10 @@ static const uint16_t indic_table[] = {
|
||||||
/* 09E0 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
/* 09E0 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||||
/* 09E8 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
/* 09E8 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||||
/* 09F0 */ _(C,x), _(C,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
/* 09F0 */ _(C,x), _(C,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
/* 09F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(SM,x), _(X,x), _(SM,T), _(X,x),
|
|
||||||
|
/* Vedic Extensions */
|
||||||
|
|
||||||
|
/* 09F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(GB,O), _(X,x), _(SM,T), _(X,x),
|
||||||
|
|
||||||
/* Gurmukhi */
|
/* Gurmukhi */
|
||||||
|
|
||||||
|
@ -139,11 +142,17 @@ static const uint16_t indic_table[] = {
|
||||||
/* 0A38 */ _(C,x), _(C,x), _(X,x), _(X,x), _(N,B), _(X,x), _(M,R), _(M,L),
|
/* 0A38 */ _(C,x), _(C,x), _(X,x), _(X,x), _(N,B), _(X,x), _(M,R), _(M,L),
|
||||||
/* 0A40 */ _(M,R), _(M,B), _(M,B), _(X,x), _(X,x), _(X,x), _(X,x), _(M,T),
|
/* 0A40 */ _(M,R), _(M,B), _(M,B), _(X,x), _(X,x), _(X,x), _(X,x), _(M,T),
|
||||||
/* 0A48 */ _(M,T), _(X,x), _(X,x), _(M,T), _(M,T), _(H,B), _(X,x), _(X,x),
|
/* 0A48 */ _(M,T), _(X,x), _(X,x), _(M,T), _(M,T), _(H,B), _(X,x), _(X,x),
|
||||||
/* 0A50 */ _(X,x), _(A,B), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
|
||||||
|
/* Vedic Extensions */
|
||||||
|
|
||||||
|
/* 0A50 */ _(X,x), _(M,O), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
|
||||||
|
/* Gurmukhi */
|
||||||
|
|
||||||
/* 0A58 */ _(X,x), _(C,x), _(C,x), _(C,x), _(C,x), _(X,x), _(C,x), _(X,x),
|
/* 0A58 */ _(X,x), _(C,x), _(C,x), _(C,x), _(C,x), _(X,x), _(C,x), _(X,x),
|
||||||
/* 0A60 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
/* 0A60 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||||
/* 0A68 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
/* 0A68 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||||
/* 0A70 */ _(SM,T), _(SM,T), _(GB,x), _(GB,x), _(X,x), _(CM,B), _(X,x), _(X,x),
|
/* 0A70 */ _(SM,T), _(SM,T), _(C,O), _(C,O), _(X,x), _(CM,B), _(X,x), _(X,x),
|
||||||
/* 0A78 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
/* 0A78 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
|
||||||
/* Gujarati */
|
/* Gujarati */
|
||||||
|
@ -163,7 +172,7 @@ static const uint16_t indic_table[] = {
|
||||||
/* 0AE0 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
/* 0AE0 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||||
/* 0AE8 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
/* 0AE8 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||||
/* 0AF0 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
/* 0AF0 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
/* 0AF8 */ _(X,x), _(C,x), _(A,T), _(A,T), _(A,T), _(N,T), _(N,T), _(N,T),
|
/* 0AF8 */ _(X,x), _(C,x), _(A,T), _(N,O), _(A,T), _(N,T), _(N,T), _(N,T),
|
||||||
|
|
||||||
/* Oriya */
|
/* Oriya */
|
||||||
|
|
||||||
|
@ -177,7 +186,13 @@ static const uint16_t indic_table[] = {
|
||||||
/* 0B38 */ _(C,x), _(C,x), _(X,x), _(X,x), _(N,B), _(S,x), _(M,R), _(M,T),
|
/* 0B38 */ _(C,x), _(C,x), _(X,x), _(X,x), _(N,B), _(S,x), _(M,R), _(M,T),
|
||||||
/* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(X,x), _(X,x), _(M,L),
|
/* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(X,x), _(X,x), _(M,L),
|
||||||
/* 0B48 */ _(M,TL), _(X,x), _(X,x), _(M,LR),_(M,TLR), _(H,B), _(X,x), _(X,x),
|
/* 0B48 */ _(M,TL), _(X,x), _(X,x), _(M,LR),_(M,TLR), _(H,B), _(X,x), _(X,x),
|
||||||
/* 0B50 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(M,T), _(M,T), _(M,TR),
|
|
||||||
|
/* Vedic Extensions */
|
||||||
|
|
||||||
|
/* 0B50 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(N,O), _(M,T), _(M,TR),
|
||||||
|
|
||||||
|
/* Oriya */
|
||||||
|
|
||||||
/* 0B58 */ _(X,x), _(X,x), _(X,x), _(X,x), _(C,x), _(C,x), _(X,x), _(C,x),
|
/* 0B58 */ _(X,x), _(X,x), _(X,x), _(X,x), _(C,x), _(C,x), _(X,x), _(C,x),
|
||||||
/* 0B60 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
/* 0B60 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||||
/* 0B68 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
/* 0B68 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||||
|
@ -222,9 +237,12 @@ static const uint16_t indic_table[] = {
|
||||||
/* 0C70 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
/* 0C70 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
/* 0C78 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
/* 0C78 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
|
||||||
|
/* Vedic Extensions */
|
||||||
|
|
||||||
|
/* 0C80 */ _(GB,O), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x),
|
||||||
|
|
||||||
/* Kannada */
|
/* Kannada */
|
||||||
|
|
||||||
/* 0C80 */ _(SM,x), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x),
|
|
||||||
/* 0C88 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(V,x), _(V,x),
|
/* 0C88 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(V,x), _(V,x),
|
||||||
/* 0C90 */ _(V,x), _(X,x), _(V,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
/* 0C90 */ _(V,x), _(X,x), _(V,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
||||||
/* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
/* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||||
|
@ -243,7 +261,7 @@ static const uint16_t indic_table[] = {
|
||||||
|
|
||||||
/* Malayalam */
|
/* Malayalam */
|
||||||
|
|
||||||
/* 0D00 */ _(SM,T), _(SM,T), _(SM,R), _(SM,R), _(SM,x), _(V,x), _(V,x), _(V,x),
|
/* 0D00 */ _(SM,T), _(SM,T), _(SM,R), _(SM,R), _(GB,O), _(V,x), _(V,x), _(V,x),
|
||||||
/* 0D08 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(V,x), _(V,x),
|
/* 0D08 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(V,x), _(V,x),
|
||||||
/* 0D10 */ _(V,x), _(X,x), _(V,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
/* 0D10 */ _(V,x), _(X,x), _(V,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
||||||
/* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
/* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||||
|
@ -331,9 +349,9 @@ static const uint16_t indic_table[] = {
|
||||||
|
|
||||||
/* 1CD0 */ _(A,T), _(A,T), _(A,T), _(X,x), _(A,O), _(A,B), _(A,B), _(A,B),
|
/* 1CD0 */ _(A,T), _(A,T), _(A,T), _(X,x), _(A,O), _(A,B), _(A,B), _(A,B),
|
||||||
/* 1CD8 */ _(A,B), _(A,B), _(A,T), _(A,T), _(A,B), _(A,B), _(A,B), _(A,B),
|
/* 1CD8 */ _(A,B), _(A,B), _(A,T), _(A,T), _(A,B), _(A,B), _(A,B), _(A,B),
|
||||||
/* 1CE0 */ _(A,T), _(A,R), _(X,O), _(X,O), _(X,O), _(X,O), _(X,O), _(X,O),
|
/* 1CE0 */ _(A,T), _(A,R), _(A,O), _(A,O), _(A,O), _(A,O), _(A,O), _(A,O),
|
||||||
/* 1CE8 */ _(X,O), _(X,x), _(X,x), _(X,x), _(X,x), _(X,B), _(X,x), _(X,x),
|
/* 1CE8 */ _(A,O), _(S,O), _(S,O), _(S,O), _(S,O), _(A,O), _(S,O), _(S,O),
|
||||||
/* 1CF0 */ _(X,x), _(X,x), _(C,x), _(C,x), _(A,T), _(CS,x), _(CS,x), _(A,R),
|
/* 1CF0 */ _(S,O), _(S,O), _(C,x), _(C,x), _(A,T), _(C,O), _(C,O), _(A,R),
|
||||||
/* 1CF8 */ _(A,x), _(A,x), _(GB,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
/* 1CF8 */ _(A,x), _(A,x), _(GB,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
|
||||||
#define indic_offset_0x2008u 1656
|
#define indic_offset_0x2008u 1656
|
||||||
|
@ -342,7 +360,10 @@ static const uint16_t indic_table[] = {
|
||||||
/* General Punctuation */
|
/* General Punctuation */
|
||||||
|
|
||||||
/* 2008 */ _(X,x), _(X,x), _(X,x), _(X,x),_(ZWNJ,x),_(ZWJ,x), _(X,x), _(X,x),
|
/* 2008 */ _(X,x), _(X,x), _(X,x), _(X,x),_(ZWNJ,x),_(ZWJ,x), _(X,x), _(X,x),
|
||||||
/* 2010 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(X,x), _(X,x), _(X,x),
|
|
||||||
|
/* Vedic Extensions */
|
||||||
|
|
||||||
|
/* 2010 */ _(GB,O), _(GB,O), _(GB,x), _(GB,x), _(GB,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
|
||||||
#define indic_offset_0x2070u 1672
|
#define indic_offset_0x2070u 1672
|
||||||
|
|
||||||
|
@ -360,7 +381,7 @@ static const uint16_t indic_table[] = {
|
||||||
|
|
||||||
/* A8E0 */ _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T),
|
/* A8E0 */ _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T),
|
||||||
/* A8E8 */ _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T),
|
/* A8E8 */ _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T),
|
||||||
/* A8F0 */ _(A,T), _(A,T), _(SM,x), _(SM,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
/* A8F0 */ _(A,T), _(A,T), _(S,O), _(S,O), _(S,O), _(S,O), _(S,O), _(S,O),
|
||||||
/* A8F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(V,x), _(M,T),
|
/* A8F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(V,x), _(M,T),
|
||||||
|
|
||||||
#define indic_offset_0xa9e0u 1728
|
#define indic_offset_0xa9e0u 1728
|
||||||
|
@ -383,7 +404,21 @@ static const uint16_t indic_table[] = {
|
||||||
/* AA70 */ _(X,x), _(C,x), _(C,x), _(C,x), _(GB,x), _(GB,x), _(GB,x), _(X,x),
|
/* AA70 */ _(X,x), _(C,x), _(C,x), _(C,x), _(GB,x), _(GB,x), _(GB,x), _(X,x),
|
||||||
/* AA78 */ _(X,x), _(X,x), _(C,x), _(N,R), _(N,T), _(N,R), _(C,x), _(C,x),
|
/* AA78 */ _(X,x), _(X,x), _(C,x), _(N,R), _(N,T), _(N,R), _(C,x), _(C,x),
|
||||||
|
|
||||||
}; /* Table items: 1792; occupancy: 71% */
|
#define indic_offset_0x11300u 1792
|
||||||
|
|
||||||
|
|
||||||
|
/* Vedic Extensions */
|
||||||
|
|
||||||
|
/* 11300 */ _(X,x), _(SM,O), _(SM,O), _(SM,O), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
/* 11308 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
/* 11310 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
/* 11318 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
/* 11320 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
/* 11328 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
/* 11330 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||||
|
/* 11338 */ _(X,x), _(X,x), _(X,x), _(N,O), _(N,O), _(X,x), _(X,x), _(X,x),
|
||||||
|
|
||||||
|
}; /* Table items: 1856; occupancy: 69% */
|
||||||
|
|
||||||
uint16_t
|
uint16_t
|
||||||
hb_indic_get_categories (hb_codepoint_t u)
|
hb_indic_get_categories (hb_codepoint_t u)
|
||||||
|
@ -404,7 +439,7 @@ hb_indic_get_categories (hb_codepoint_t u)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x2u:
|
case 0x2u:
|
||||||
if (unlikely (u == 0x25CCu)) return _(GB,x);
|
if (unlikely (u == 0x25CCu)) return _(DC,O);
|
||||||
if (hb_in_range<hb_codepoint_t> (u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u];
|
if (hb_in_range<hb_codepoint_t> (u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u];
|
||||||
if (hb_in_range<hb_codepoint_t> (u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u];
|
if (hb_in_range<hb_codepoint_t> (u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u];
|
||||||
break;
|
break;
|
||||||
|
@ -415,6 +450,10 @@ hb_indic_get_categories (hb_codepoint_t u)
|
||||||
if (hb_in_range<hb_codepoint_t> (u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u];
|
if (hb_in_range<hb_codepoint_t> (u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u];
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 0x11u:
|
||||||
|
if (hb_in_range<hb_codepoint_t> (u, 0x11300u, 0x1133Fu)) return indic_table[u - 0x11300u + indic_offset_0x11300u];
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -428,6 +467,7 @@ hb_indic_get_categories (hb_codepoint_t u)
|
||||||
#undef ISC_CM
|
#undef ISC_CM
|
||||||
#undef ISC_CS
|
#undef ISC_CS
|
||||||
#undef ISC_Co
|
#undef ISC_Co
|
||||||
|
#undef ISC_DC
|
||||||
#undef ISC_H
|
#undef ISC_H
|
||||||
#undef ISC_M
|
#undef ISC_M
|
||||||
#undef ISC_N
|
#undef ISC_N
|
||||||
|
@ -442,8 +482,6 @@ hb_indic_get_categories (hb_codepoint_t u)
|
||||||
#undef ISC_ZWNJ
|
#undef ISC_ZWNJ
|
||||||
|
|
||||||
#undef IMC_B
|
#undef IMC_B
|
||||||
#undef IMC_BL
|
|
||||||
#undef IMC_BR
|
|
||||||
#undef IMC_L
|
#undef IMC_L
|
||||||
#undef IMC_LR
|
#undef IMC_LR
|
||||||
#undef IMC_x
|
#undef IMC_x
|
||||||
|
@ -452,11 +490,9 @@ hb_indic_get_categories (hb_codepoint_t u)
|
||||||
#undef IMC_T
|
#undef IMC_T
|
||||||
#undef IMC_TB
|
#undef IMC_TB
|
||||||
#undef IMC_TBL
|
#undef IMC_TBL
|
||||||
#undef IMC_TBR
|
|
||||||
#undef IMC_TL
|
#undef IMC_TL
|
||||||
#undef IMC_TLR
|
#undef IMC_TLR
|
||||||
#undef IMC_TR
|
#undef IMC_TR
|
||||||
#undef IMC_VOL
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -265,63 +265,16 @@ set_indic_properties (hb_glyph_info_t &info)
|
||||||
indic_category_t cat = (indic_category_t) (type & 0xFFu);
|
indic_category_t cat = (indic_category_t) (type & 0xFFu);
|
||||||
indic_position_t pos = (indic_position_t) (type >> 8);
|
indic_position_t pos = (indic_position_t) (type >> 8);
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Re-assign category
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* The following act more like the Bindus. */
|
|
||||||
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u)))
|
|
||||||
cat = OT_SM;
|
|
||||||
/* The following act like consonants. */
|
|
||||||
else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u,
|
|
||||||
0x1CF5u, 0x1CF6u)))
|
|
||||||
cat = OT_C;
|
|
||||||
/* TODO: The following should only be allowed after a Visarga.
|
|
||||||
* For now, just treat them like regular tone marks. */
|
|
||||||
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u)))
|
|
||||||
cat = OT_A;
|
|
||||||
/* TODO: The following should only be allowed after some of
|
|
||||||
* the nasalization marks, maybe only for U+1CE9..U+1CF1.
|
|
||||||
* For now, just treat them like tone marks. */
|
|
||||||
else if (unlikely (u == 0x1CEDu))
|
|
||||||
cat = OT_A;
|
|
||||||
/* The following take marks in standalone clusters, similar to Avagraha. */
|
|
||||||
else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u,
|
|
||||||
0x1CE9u, 0x1CECu,
|
|
||||||
0x1CEEu, 0x1CF1u)))
|
|
||||||
{
|
|
||||||
cat = OT_Symbol;
|
|
||||||
//static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), "");
|
|
||||||
}
|
|
||||||
else if (unlikely (u == 0x0A51u))
|
|
||||||
{
|
|
||||||
/* https://github.com/harfbuzz/harfbuzz/issues/524 */
|
|
||||||
cat = OT_M;
|
|
||||||
pos = POS_BELOW_C;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
|
|
||||||
* so the Indic shaper needs to know their categories. */
|
|
||||||
else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM;
|
|
||||||
else if (unlikely (u == 0x1133Bu || u == 0x1133Cu)) cat = OT_N;
|
|
||||||
|
|
||||||
else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */
|
|
||||||
else if (unlikely (u == 0x0B55u)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/2849 */
|
|
||||||
|
|
||||||
else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */
|
|
||||||
else if (unlikely (u == 0x09FCu)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/1613 */
|
|
||||||
else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */
|
|
||||||
else if (unlikely (u == 0x0D04u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/3511 */
|
|
||||||
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u)))
|
|
||||||
cat = OT_PLACEHOLDER;
|
|
||||||
else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Re-assign position.
|
* Re-assign position.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
if (unlikely (u == 0x0A51u))
|
||||||
|
{
|
||||||
|
/* https://github.com/harfbuzz/harfbuzz/issues/524 */
|
||||||
|
pos = POS_BELOW_C;
|
||||||
|
}
|
||||||
|
|
||||||
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
|
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
|
||||||
{
|
{
|
||||||
pos = POS_BASE_C;
|
pos = POS_BASE_C;
|
||||||
|
@ -340,7 +293,6 @@ set_indic_properties (hb_glyph_info_t &info)
|
||||||
if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
|
if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
info.indic_category() = cat;
|
info.indic_category() = cat;
|
||||||
info.indic_position() = pos;
|
info.indic_position() = pos;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue