[indic-generator] Move category overrides to generator
This commit is contained in:
parent
58eeb3a180
commit
4907514026
|
@ -42,7 +42,6 @@ files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
|
|||
headers = [[f.readline () for i in range (2)] for f in files]
|
||||
|
||||
data = [{} for _ in files]
|
||||
values = [{} for _ in files]
|
||||
for i, f in enumerate (files):
|
||||
for line in f:
|
||||
|
||||
|
@ -65,12 +64,9 @@ for i, f in enumerate (files):
|
|||
|
||||
for u in range (start, end + 1):
|
||||
data[i][u] = t
|
||||
values[i][t] = values[i].get (t, 0) + end - start + 1
|
||||
|
||||
# Merge data into one dict:
|
||||
defaults = ('Other', 'Not_Applicable', 'No_Block')
|
||||
for i,v in enumerate (defaults):
|
||||
values[i][v] = values[i].get (v, 0) + 1
|
||||
combined = {}
|
||||
for i,d in enumerate (data):
|
||||
for u,v in d.items ():
|
||||
|
@ -84,7 +80,7 @@ data = combined
|
|||
del combined
|
||||
|
||||
|
||||
# Convert data
|
||||
# Convert categories & positions types
|
||||
|
||||
category_map = {
|
||||
'Other' : 'X',
|
||||
|
@ -123,33 +119,94 @@ category_map = {
|
|||
'Vowel' : 'V',
|
||||
'Vowel_Dependent' : 'M',
|
||||
'Vowel_Independent' : 'V',
|
||||
'Dotted_Circle' : 'DOTTEDCIRCLE', # Ours, not Unicode's
|
||||
}
|
||||
|
||||
category_overrides = {
|
||||
|
||||
# The following act more like the Bindus.
|
||||
0x0953: 'SM',
|
||||
0x0954: 'SM',
|
||||
|
||||
# The following act like consonants.
|
||||
0x0A72: 'C',
|
||||
0x0A73: 'C',
|
||||
0x1CF5: 'C',
|
||||
0x1CF6: 'C',
|
||||
|
||||
# TODO: The following should only be allowed after a Visarga.
|
||||
# For now, just treat them like regular tone marks.
|
||||
0x1CE2: 'A',
|
||||
0x1CE3: 'A',
|
||||
0x1CE4: 'A',
|
||||
0x1CE5: 'A',
|
||||
0x1CE6: 'A',
|
||||
0x1CE7: 'A',
|
||||
0x1CE8: 'A',
|
||||
|
||||
# TODO: The following should only be allowed after some of
|
||||
# the nasalization marks, maybe only for U+1CE9..U+1CF1.
|
||||
# For now, just treat them like tone marks.
|
||||
0x1CED: 'A',
|
||||
|
||||
# The following take marks in standalone clusters, similar to Avagraha.
|
||||
0xA8F2: 'Symbol',
|
||||
0xA8F3: 'Symbol',
|
||||
0xA8F4: 'Symbol',
|
||||
0xA8F5: 'Symbol',
|
||||
0xA8F6: 'Symbol',
|
||||
0xA8F7: 'Symbol',
|
||||
0x1CE9: 'Symbol',
|
||||
0x1CEA: 'Symbol',
|
||||
0x1CEB: 'Symbol',
|
||||
0x1CEC: 'Symbol',
|
||||
0x1CEE: 'Symbol',
|
||||
0x1CEF: 'Symbol',
|
||||
0x1CF0: 'Symbol',
|
||||
0x1CF1: 'Symbol',
|
||||
|
||||
0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524
|
||||
|
||||
# According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
|
||||
# so the Indic shaper needs to know their categories.
|
||||
0x11301: 'SM',
|
||||
0x11302: 'SM',
|
||||
0x11303: 'SM',
|
||||
0x1133B: 'N',
|
||||
0x1133C: 'N',
|
||||
|
||||
0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552
|
||||
0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849
|
||||
|
||||
0x0980: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/538
|
||||
0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613
|
||||
0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623
|
||||
0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511
|
||||
|
||||
0x2010: 'PLACEHOLDER',
|
||||
0x2011: 'PLACEHOLDER',
|
||||
|
||||
0x25CC: 'DOTTEDCIRCLE',
|
||||
}
|
||||
|
||||
|
||||
new_values0 = {}
|
||||
for k,v in values[0].items():
|
||||
new_values0[category_map[k]] = new_values0.get(category_map[k], 0) + v
|
||||
values[0] = new_values0
|
||||
defaults = (category_map[defaults[0]], defaults[1], defaults[2])
|
||||
|
||||
new_data = {}
|
||||
for key, (cat, pos, block) in data.items():
|
||||
|
||||
cat = category_map[cat]
|
||||
|
||||
|
||||
|
||||
new_data[key] = (cat, pos, block)
|
||||
data = new_data
|
||||
|
||||
for k,new_cat in category_overrides.items():
|
||||
(cat, pos, block) in data.get(k, defaults)
|
||||
data[k] = (new_cat, pos, block)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
values = [{_: 1} for _ in defaults]
|
||||
for vv in data.values():
|
||||
for i,v in enumerate(vv):
|
||||
values[i][v] = values[i].get (v, 0) + 1
|
||||
|
||||
|
||||
|
||||
|
@ -184,6 +241,7 @@ print ()
|
|||
short = [{
|
||||
"Coeng": 'Co',
|
||||
"PLACEHOLDER": 'GB',
|
||||
"DOTTEDCIRCLE": 'DC',
|
||||
},{
|
||||
"Not_Applicable": 'x',
|
||||
}]
|
||||
|
|
|
@ -23,40 +23,37 @@
|
|||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-macros"
|
||||
|
||||
#define ISC_A OT_A /* 59 chars; A */
|
||||
#define ISC_C OT_C /* 2226 chars; C */
|
||||
#define ISC_CM OT_CM /* 196 chars; CM */
|
||||
#define ISC_CS OT_CS /* 8 chars; CS */
|
||||
#define ISC_Co OT_Coeng /* 12 chars; Coeng */
|
||||
#define ISC_H OT_H /* 27 chars; H */
|
||||
#define ISC_M OT_M /* 713 chars; M */
|
||||
#define ISC_N OT_N /* 74 chars; N */
|
||||
#define ISC_GB OT_PLACEHOLDER /* 534 chars; PLACEHOLDER */
|
||||
#define ISC_RS OT_RS /* 2 chars; RS */
|
||||
#define ISC_R OT_Repha /* 3 chars; Repha */
|
||||
#define ISC_SM OT_SM /* 154 chars; SM */
|
||||
#define ISC_S OT_Symbol /* 17 chars; Symbol */
|
||||
#define ISC_V OT_V /* 516 chars; V */
|
||||
#define ISC_X OT_X /* 19 chars; X */
|
||||
#define ISC_ZWJ OT_ZWJ /* 1 chars; ZWJ */
|
||||
#define ISC_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */
|
||||
#define ISC_A OT_A /* 51 chars; A */
|
||||
#define ISC_C OT_C /* 532 chars; C */
|
||||
#define ISC_CM OT_CM /* 10 chars; CM */
|
||||
#define ISC_CS OT_CS /* 2 chars; CS */
|
||||
#define ISC_Co OT_Coeng /* 2 chars; Coeng */
|
||||
#define ISC_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */
|
||||
#define ISC_H OT_H /* 10 chars; H */
|
||||
#define ISC_M OT_M /* 209 chars; M */
|
||||
#define ISC_N OT_N /* 35 chars; N */
|
||||
#define ISC_GB OT_PLACEHOLDER /* 168 chars; PLACEHOLDER */
|
||||
#define ISC_RS OT_RS /* 2 chars; RS */
|
||||
#define ISC_R OT_Repha /* 1 chars; Repha */
|
||||
#define ISC_SM OT_SM /* 56 chars; SM */
|
||||
#define ISC_S OT_Symbol /* 22 chars; Symbol */
|
||||
#define ISC_V OT_V /* 190 chars; V */
|
||||
#define ISC_X OT_X /* 2 chars; X */
|
||||
#define ISC_ZWJ OT_ZWJ /* 1 chars; ZWJ */
|
||||
#define ISC_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */
|
||||
|
||||
#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 352 chars; Bottom */
|
||||
#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */
|
||||
#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 4 chars; Bottom_And_Right */
|
||||
#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 64 chars; Left */
|
||||
#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 22 chars; Left_And_Right */
|
||||
#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */
|
||||
#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */
|
||||
#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 290 chars; Right */
|
||||
#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 418 chars; Top */
|
||||
#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */
|
||||
#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 2 chars; Top_And_Bottom_And_Left */
|
||||
#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */
|
||||
#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */
|
||||
#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */
|
||||
#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */
|
||||
#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */
|
||||
#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 77 chars; Bottom */
|
||||
#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 21 chars; Left */
|
||||
#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 14 chars; Left_And_Right */
|
||||
#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 904 chars; Not_Applicable */
|
||||
#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 44 chars; Overstruck */
|
||||
#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 98 chars; Right */
|
||||
#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 122 chars; Top */
|
||||
#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 1 chars; Top_And_Bottom */
|
||||
#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 1 chars; Top_And_Bottom_And_Left */
|
||||
#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 3 chars; Top_And_Left */
|
||||
#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 3 chars; Top_And_Left_And_Right */
|
||||
#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 7 chars; Top_And_Right */
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
|
@ -101,16 +98,19 @@ static const uint16_t indic_table[] = {
|
|||
/* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(S,x), _(M,R), _(M,L),
|
||||
/* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T),
|
||||
/* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(H,B), _(M,L), _(M,R),
|
||||
/* 0950 */ _(X,x), _(A,T), _(A,B), _(X,T), _(X,T), _(M,T), _(M,B), _(M,B),
|
||||
/* 0950 */ _(X,x), _(A,T), _(A,B), _(SM,O), _(SM,O), _(M,T), _(M,B), _(M,B),
|
||||
/* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0960 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||
/* 0968 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||
/* 0970 */ _(X,x), _(X,x), _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(V,x),
|
||||
/* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
|
||||
/* Vedic Extensions */
|
||||
|
||||
/* 0980 */ _(GB,O), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x),
|
||||
|
||||
/* Bengali */
|
||||
|
||||
/* 0980 */ _(GB,x), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x),
|
||||
/* 0988 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(X,x), _(V,x),
|
||||
/* 0990 */ _(V,x), _(X,x), _(X,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
|
@ -125,7 +125,10 @@ static const uint16_t indic_table[] = {
|
|||
/* 09E0 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||
/* 09E8 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||
/* 09F0 */ _(C,x), _(C,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 09F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(SM,x), _(X,x), _(SM,T), _(X,x),
|
||||
|
||||
/* Vedic Extensions */
|
||||
|
||||
/* 09F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(GB,O), _(X,x), _(SM,T), _(X,x),
|
||||
|
||||
/* Gurmukhi */
|
||||
|
||||
|
@ -139,11 +142,17 @@ static const uint16_t indic_table[] = {
|
|||
/* 0A38 */ _(C,x), _(C,x), _(X,x), _(X,x), _(N,B), _(X,x), _(M,R), _(M,L),
|
||||
/* 0A40 */ _(M,R), _(M,B), _(M,B), _(X,x), _(X,x), _(X,x), _(X,x), _(M,T),
|
||||
/* 0A48 */ _(M,T), _(X,x), _(X,x), _(M,T), _(M,T), _(H,B), _(X,x), _(X,x),
|
||||
/* 0A50 */ _(X,x), _(A,B), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
|
||||
/* Vedic Extensions */
|
||||
|
||||
/* 0A50 */ _(X,x), _(M,O), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
|
||||
/* Gurmukhi */
|
||||
|
||||
/* 0A58 */ _(X,x), _(C,x), _(C,x), _(C,x), _(C,x), _(X,x), _(C,x), _(X,x),
|
||||
/* 0A60 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||
/* 0A68 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||
/* 0A70 */ _(SM,T), _(SM,T), _(GB,x), _(GB,x), _(X,x), _(CM,B), _(X,x), _(X,x),
|
||||
/* 0A70 */ _(SM,T), _(SM,T), _(C,O), _(C,O), _(X,x), _(CM,B), _(X,x), _(X,x),
|
||||
/* 0A78 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
|
||||
/* Gujarati */
|
||||
|
@ -163,7 +172,7 @@ static const uint16_t indic_table[] = {
|
|||
/* 0AE0 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||
/* 0AE8 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||
/* 0AF0 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 0AF8 */ _(X,x), _(C,x), _(A,T), _(A,T), _(A,T), _(N,T), _(N,T), _(N,T),
|
||||
/* 0AF8 */ _(X,x), _(C,x), _(A,T), _(N,O), _(A,T), _(N,T), _(N,T), _(N,T),
|
||||
|
||||
/* Oriya */
|
||||
|
||||
|
@ -177,7 +186,13 @@ static const uint16_t indic_table[] = {
|
|||
/* 0B38 */ _(C,x), _(C,x), _(X,x), _(X,x), _(N,B), _(S,x), _(M,R), _(M,T),
|
||||
/* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(X,x), _(X,x), _(M,L),
|
||||
/* 0B48 */ _(M,TL), _(X,x), _(X,x), _(M,LR),_(M,TLR), _(H,B), _(X,x), _(X,x),
|
||||
/* 0B50 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(M,T), _(M,T), _(M,TR),
|
||||
|
||||
/* Vedic Extensions */
|
||||
|
||||
/* 0B50 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(N,O), _(M,T), _(M,TR),
|
||||
|
||||
/* Oriya */
|
||||
|
||||
/* 0B58 */ _(X,x), _(X,x), _(X,x), _(X,x), _(C,x), _(C,x), _(X,x), _(C,x),
|
||||
/* 0B60 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x),
|
||||
/* 0B68 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x),
|
||||
|
@ -222,9 +237,12 @@ static const uint16_t indic_table[] = {
|
|||
/* 0C70 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 0C78 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
|
||||
/* Vedic Extensions */
|
||||
|
||||
/* 0C80 */ _(GB,O), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x),
|
||||
|
||||
/* Kannada */
|
||||
|
||||
/* 0C80 */ _(SM,x), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x),
|
||||
/* 0C88 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(V,x), _(V,x),
|
||||
/* 0C90 */ _(V,x), _(X,x), _(V,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
|
@ -243,7 +261,7 @@ static const uint16_t indic_table[] = {
|
|||
|
||||
/* Malayalam */
|
||||
|
||||
/* 0D00 */ _(SM,T), _(SM,T), _(SM,R), _(SM,R), _(SM,x), _(V,x), _(V,x), _(V,x),
|
||||
/* 0D00 */ _(SM,T), _(SM,T), _(SM,R), _(SM,R), _(GB,O), _(V,x), _(V,x), _(V,x),
|
||||
/* 0D08 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(V,x), _(V,x),
|
||||
/* 0D10 */ _(V,x), _(X,x), _(V,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
|
@ -331,9 +349,9 @@ static const uint16_t indic_table[] = {
|
|||
|
||||
/* 1CD0 */ _(A,T), _(A,T), _(A,T), _(X,x), _(A,O), _(A,B), _(A,B), _(A,B),
|
||||
/* 1CD8 */ _(A,B), _(A,B), _(A,T), _(A,T), _(A,B), _(A,B), _(A,B), _(A,B),
|
||||
/* 1CE0 */ _(A,T), _(A,R), _(X,O), _(X,O), _(X,O), _(X,O), _(X,O), _(X,O),
|
||||
/* 1CE8 */ _(X,O), _(X,x), _(X,x), _(X,x), _(X,x), _(X,B), _(X,x), _(X,x),
|
||||
/* 1CF0 */ _(X,x), _(X,x), _(C,x), _(C,x), _(A,T), _(CS,x), _(CS,x), _(A,R),
|
||||
/* 1CE0 */ _(A,T), _(A,R), _(A,O), _(A,O), _(A,O), _(A,O), _(A,O), _(A,O),
|
||||
/* 1CE8 */ _(A,O), _(S,O), _(S,O), _(S,O), _(S,O), _(A,O), _(S,O), _(S,O),
|
||||
/* 1CF0 */ _(S,O), _(S,O), _(C,x), _(C,x), _(A,T), _(C,O), _(C,O), _(A,R),
|
||||
/* 1CF8 */ _(A,x), _(A,x), _(GB,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
|
||||
#define indic_offset_0x2008u 1656
|
||||
|
@ -342,7 +360,10 @@ static const uint16_t indic_table[] = {
|
|||
/* General Punctuation */
|
||||
|
||||
/* 2008 */ _(X,x), _(X,x), _(X,x), _(X,x),_(ZWNJ,x),_(ZWJ,x), _(X,x), _(X,x),
|
||||
/* 2010 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(X,x), _(X,x), _(X,x),
|
||||
|
||||
/* Vedic Extensions */
|
||||
|
||||
/* 2010 */ _(GB,O), _(GB,O), _(GB,x), _(GB,x), _(GB,x), _(X,x), _(X,x), _(X,x),
|
||||
|
||||
#define indic_offset_0x2070u 1672
|
||||
|
||||
|
@ -360,7 +381,7 @@ static const uint16_t indic_table[] = {
|
|||
|
||||
/* A8E0 */ _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T),
|
||||
/* A8E8 */ _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T),
|
||||
/* A8F0 */ _(A,T), _(A,T), _(SM,x), _(SM,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* A8F0 */ _(A,T), _(A,T), _(S,O), _(S,O), _(S,O), _(S,O), _(S,O), _(S,O),
|
||||
/* A8F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(V,x), _(M,T),
|
||||
|
||||
#define indic_offset_0xa9e0u 1728
|
||||
|
@ -383,7 +404,21 @@ static const uint16_t indic_table[] = {
|
|||
/* AA70 */ _(X,x), _(C,x), _(C,x), _(C,x), _(GB,x), _(GB,x), _(GB,x), _(X,x),
|
||||
/* AA78 */ _(X,x), _(X,x), _(C,x), _(N,R), _(N,T), _(N,R), _(C,x), _(C,x),
|
||||
|
||||
}; /* Table items: 1792; occupancy: 71% */
|
||||
#define indic_offset_0x11300u 1792
|
||||
|
||||
|
||||
/* Vedic Extensions */
|
||||
|
||||
/* 11300 */ _(X,x), _(SM,O), _(SM,O), _(SM,O), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 11308 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 11310 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 11318 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 11320 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 11328 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 11330 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x),
|
||||
/* 11338 */ _(X,x), _(X,x), _(X,x), _(N,O), _(N,O), _(X,x), _(X,x), _(X,x),
|
||||
|
||||
}; /* Table items: 1856; occupancy: 69% */
|
||||
|
||||
uint16_t
|
||||
hb_indic_get_categories (hb_codepoint_t u)
|
||||
|
@ -404,7 +439,7 @@ hb_indic_get_categories (hb_codepoint_t u)
|
|||
break;
|
||||
|
||||
case 0x2u:
|
||||
if (unlikely (u == 0x25CCu)) return _(GB,x);
|
||||
if (unlikely (u == 0x25CCu)) return _(DC,O);
|
||||
if (hb_in_range<hb_codepoint_t> (u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u];
|
||||
if (hb_in_range<hb_codepoint_t> (u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u];
|
||||
break;
|
||||
|
@ -415,6 +450,10 @@ hb_indic_get_categories (hb_codepoint_t u)
|
|||
if (hb_in_range<hb_codepoint_t> (u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u];
|
||||
break;
|
||||
|
||||
case 0x11u:
|
||||
if (hb_in_range<hb_codepoint_t> (u, 0x11300u, 0x1133Fu)) return indic_table[u - 0x11300u + indic_offset_0x11300u];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -428,6 +467,7 @@ hb_indic_get_categories (hb_codepoint_t u)
|
|||
#undef ISC_CM
|
||||
#undef ISC_CS
|
||||
#undef ISC_Co
|
||||
#undef ISC_DC
|
||||
#undef ISC_H
|
||||
#undef ISC_M
|
||||
#undef ISC_N
|
||||
|
@ -442,8 +482,6 @@ hb_indic_get_categories (hb_codepoint_t u)
|
|||
#undef ISC_ZWNJ
|
||||
|
||||
#undef IMC_B
|
||||
#undef IMC_BL
|
||||
#undef IMC_BR
|
||||
#undef IMC_L
|
||||
#undef IMC_LR
|
||||
#undef IMC_x
|
||||
|
@ -452,11 +490,9 @@ hb_indic_get_categories (hb_codepoint_t u)
|
|||
#undef IMC_T
|
||||
#undef IMC_TB
|
||||
#undef IMC_TBL
|
||||
#undef IMC_TBR
|
||||
#undef IMC_TL
|
||||
#undef IMC_TLR
|
||||
#undef IMC_TR
|
||||
#undef IMC_VOL
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -265,63 +265,16 @@ set_indic_properties (hb_glyph_info_t &info)
|
|||
indic_category_t cat = (indic_category_t) (type & 0xFFu);
|
||||
indic_position_t pos = (indic_position_t) (type >> 8);
|
||||
|
||||
|
||||
/*
|
||||
* Re-assign category
|
||||
*/
|
||||
|
||||
/* The following act more like the Bindus. */
|
||||
if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u)))
|
||||
cat = OT_SM;
|
||||
/* The following act like consonants. */
|
||||
else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u,
|
||||
0x1CF5u, 0x1CF6u)))
|
||||
cat = OT_C;
|
||||
/* TODO: The following should only be allowed after a Visarga.
|
||||
* For now, just treat them like regular tone marks. */
|
||||
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u)))
|
||||
cat = OT_A;
|
||||
/* TODO: The following should only be allowed after some of
|
||||
* the nasalization marks, maybe only for U+1CE9..U+1CF1.
|
||||
* For now, just treat them like tone marks. */
|
||||
else if (unlikely (u == 0x1CEDu))
|
||||
cat = OT_A;
|
||||
/* The following take marks in standalone clusters, similar to Avagraha. */
|
||||
else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u,
|
||||
0x1CE9u, 0x1CECu,
|
||||
0x1CEEu, 0x1CF1u)))
|
||||
{
|
||||
cat = OT_Symbol;
|
||||
//static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), "");
|
||||
}
|
||||
else if (unlikely (u == 0x0A51u))
|
||||
{
|
||||
/* https://github.com/harfbuzz/harfbuzz/issues/524 */
|
||||
cat = OT_M;
|
||||
pos = POS_BELOW_C;
|
||||
}
|
||||
|
||||
/* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil,
|
||||
* so the Indic shaper needs to know their categories. */
|
||||
else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM;
|
||||
else if (unlikely (u == 0x1133Bu || u == 0x1133Cu)) cat = OT_N;
|
||||
|
||||
else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */
|
||||
else if (unlikely (u == 0x0B55u)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/2849 */
|
||||
|
||||
else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */
|
||||
else if (unlikely (u == 0x09FCu)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/1613 */
|
||||
else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */
|
||||
else if (unlikely (u == 0x0D04u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/3511 */
|
||||
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u)))
|
||||
cat = OT_PLACEHOLDER;
|
||||
else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
|
||||
|
||||
|
||||
/*
|
||||
* Re-assign position.
|
||||
*/
|
||||
|
||||
if (unlikely (u == 0x0A51u))
|
||||
{
|
||||
/* https://github.com/harfbuzz/harfbuzz/issues/524 */
|
||||
pos = POS_BELOW_C;
|
||||
}
|
||||
|
||||
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
|
||||
{
|
||||
pos = POS_BASE_C;
|
||||
|
@ -340,7 +293,6 @@ set_indic_properties (hb_glyph_info_t &info)
|
|||
if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
|
||||
|
||||
|
||||
|
||||
info.indic_category() = cat;
|
||||
info.indic_position() = pos;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue