From 490751402686e86832019df0dfb0905b1a0b42d5 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 9 Jun 2022 06:33:51 -0600 Subject: [PATCH] [indic-generator] Move category overrides to generator --- src/gen-indic-table.py | 96 ++++++++++++++++----- src/hb-ot-shaper-indic-table.cc | 142 ++++++++++++++++++++------------ src/hb-ot-shaper-indic.hh | 60 ++------------ 3 files changed, 172 insertions(+), 126 deletions(-) diff --git a/src/gen-indic-table.py b/src/gen-indic-table.py index dc42e152b..bcf52eb75 100755 --- a/src/gen-indic-table.py +++ b/src/gen-indic-table.py @@ -42,7 +42,6 @@ files = [open (x, encoding='utf-8') for x in sys.argv[1:]] headers = [[f.readline () for i in range (2)] for f in files] data = [{} for _ in files] -values = [{} for _ in files] for i, f in enumerate (files): for line in f: @@ -65,12 +64,9 @@ for i, f in enumerate (files): for u in range (start, end + 1): data[i][u] = t - values[i][t] = values[i].get (t, 0) + end - start + 1 # Merge data into one dict: defaults = ('Other', 'Not_Applicable', 'No_Block') -for i,v in enumerate (defaults): - values[i][v] = values[i].get (v, 0) + 1 combined = {} for i,d in enumerate (data): for u,v in d.items (): @@ -84,7 +80,7 @@ data = combined del combined -# Convert data +# Convert categories & positions types category_map = { 'Other' : 'X', @@ -123,33 +119,94 @@ category_map = { 'Vowel' : 'V', 'Vowel_Dependent' : 'M', 'Vowel_Independent' : 'V', + 'Dotted_Circle' : 'DOTTEDCIRCLE', # Ours, not Unicode's +} + +category_overrides = { + + # The following act more like the Bindus. + 0x0953: 'SM', + 0x0954: 'SM', + + # The following act like consonants. + 0x0A72: 'C', + 0x0A73: 'C', + 0x1CF5: 'C', + 0x1CF6: 'C', + + # TODO: The following should only be allowed after a Visarga. + # For now, just treat them like regular tone marks. + 0x1CE2: 'A', + 0x1CE3: 'A', + 0x1CE4: 'A', + 0x1CE5: 'A', + 0x1CE6: 'A', + 0x1CE7: 'A', + 0x1CE8: 'A', + + # TODO: The following should only be allowed after some of + # the nasalization marks, maybe only for U+1CE9..U+1CF1. + # For now, just treat them like tone marks. + 0x1CED: 'A', + + # The following take marks in standalone clusters, similar to Avagraha. + 0xA8F2: 'Symbol', + 0xA8F3: 'Symbol', + 0xA8F4: 'Symbol', + 0xA8F5: 'Symbol', + 0xA8F6: 'Symbol', + 0xA8F7: 'Symbol', + 0x1CE9: 'Symbol', + 0x1CEA: 'Symbol', + 0x1CEB: 'Symbol', + 0x1CEC: 'Symbol', + 0x1CEE: 'Symbol', + 0x1CEF: 'Symbol', + 0x1CF0: 'Symbol', + 0x1CF1: 'Symbol', + + 0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524 + + # According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, + # so the Indic shaper needs to know their categories. + 0x11301: 'SM', + 0x11302: 'SM', + 0x11303: 'SM', + 0x1133B: 'N', + 0x1133C: 'N', + + 0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552 + 0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849 + + 0x0980: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/issues/538 + 0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613 + 0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623 + 0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511 + + 0x2010: 'PLACEHOLDER', + 0x2011: 'PLACEHOLDER', + + 0x25CC: 'DOTTEDCIRCLE', } -new_values0 = {} -for k,v in values[0].items(): - new_values0[category_map[k]] = new_values0.get(category_map[k], 0) + v -values[0] = new_values0 defaults = (category_map[defaults[0]], defaults[1], defaults[2]) new_data = {} for key, (cat, pos, block) in data.items(): cat = category_map[cat] - - - new_data[key] = (cat, pos, block) data = new_data +for k,new_cat in category_overrides.items(): + (cat, pos, block) in data.get(k, defaults) + data[k] = (new_cat, pos, block) - - - - - - - +values = [{_: 1} for _ in defaults] +for vv in data.values(): + for i,v in enumerate(vv): + values[i][v] = values[i].get (v, 0) + 1 @@ -184,6 +241,7 @@ print () short = [{ "Coeng": 'Co', "PLACEHOLDER": 'GB', + "DOTTEDCIRCLE": 'DC', },{ "Not_Applicable": 'x', }] diff --git a/src/hb-ot-shaper-indic-table.cc b/src/hb-ot-shaper-indic-table.cc index aa4ab43f7..47b1c570c 100644 --- a/src/hb-ot-shaper-indic-table.cc +++ b/src/hb-ot-shaper-indic-table.cc @@ -23,40 +23,37 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-macros" -#define ISC_A OT_A /* 59 chars; A */ -#define ISC_C OT_C /* 2226 chars; C */ -#define ISC_CM OT_CM /* 196 chars; CM */ -#define ISC_CS OT_CS /* 8 chars; CS */ -#define ISC_Co OT_Coeng /* 12 chars; Coeng */ -#define ISC_H OT_H /* 27 chars; H */ -#define ISC_M OT_M /* 713 chars; M */ -#define ISC_N OT_N /* 74 chars; N */ -#define ISC_GB OT_PLACEHOLDER /* 534 chars; PLACEHOLDER */ -#define ISC_RS OT_RS /* 2 chars; RS */ -#define ISC_R OT_Repha /* 3 chars; Repha */ -#define ISC_SM OT_SM /* 154 chars; SM */ -#define ISC_S OT_Symbol /* 17 chars; Symbol */ -#define ISC_V OT_V /* 516 chars; V */ -#define ISC_X OT_X /* 19 chars; X */ -#define ISC_ZWJ OT_ZWJ /* 1 chars; ZWJ */ -#define ISC_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */ +#define ISC_A OT_A /* 51 chars; A */ +#define ISC_C OT_C /* 532 chars; C */ +#define ISC_CM OT_CM /* 10 chars; CM */ +#define ISC_CS OT_CS /* 2 chars; CS */ +#define ISC_Co OT_Coeng /* 2 chars; Coeng */ +#define ISC_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */ +#define ISC_H OT_H /* 10 chars; H */ +#define ISC_M OT_M /* 209 chars; M */ +#define ISC_N OT_N /* 35 chars; N */ +#define ISC_GB OT_PLACEHOLDER /* 168 chars; PLACEHOLDER */ +#define ISC_RS OT_RS /* 2 chars; RS */ +#define ISC_R OT_Repha /* 1 chars; Repha */ +#define ISC_SM OT_SM /* 56 chars; SM */ +#define ISC_S OT_Symbol /* 22 chars; Symbol */ +#define ISC_V OT_V /* 190 chars; V */ +#define ISC_X OT_X /* 2 chars; X */ +#define ISC_ZWJ OT_ZWJ /* 1 chars; ZWJ */ +#define ISC_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */ -#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 352 chars; Bottom */ -#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */ -#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 4 chars; Bottom_And_Right */ -#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 64 chars; Left */ -#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 22 chars; Left_And_Right */ -#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */ -#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */ -#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 290 chars; Right */ -#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 418 chars; Top */ -#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */ -#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 2 chars; Top_And_Bottom_And_Left */ -#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */ -#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */ -#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */ -#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */ -#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */ +#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 77 chars; Bottom */ +#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 21 chars; Left */ +#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 14 chars; Left_And_Right */ +#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 904 chars; Not_Applicable */ +#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 44 chars; Overstruck */ +#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 98 chars; Right */ +#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 122 chars; Top */ +#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 1 chars; Top_And_Bottom */ +#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 1 chars; Top_And_Bottom_And_Left */ +#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 3 chars; Top_And_Left */ +#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 3 chars; Top_And_Left_And_Right */ +#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 7 chars; Top_And_Right */ #pragma GCC diagnostic pop @@ -101,16 +98,19 @@ static const uint16_t indic_table[] = { /* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(S,x), _(M,R), _(M,L), /* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T), /* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(H,B), _(M,L), _(M,R), - /* 0950 */ _(X,x), _(A,T), _(A,B), _(X,T), _(X,T), _(M,T), _(M,B), _(M,B), + /* 0950 */ _(X,x), _(A,T), _(A,B), _(SM,O), _(SM,O), _(M,T), _(M,B), _(M,B), /* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), /* 0960 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x), /* 0968 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), /* 0970 */ _(X,x), _(X,x), _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), /* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* Vedic Extensions */ + + /* 0980 */ _(GB,O), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x), + /* Bengali */ - /* 0980 */ _(GB,x), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x), /* 0988 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(X,x), _(V,x), /* 0990 */ _(V,x), _(X,x), _(X,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x), /* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), @@ -125,7 +125,10 @@ static const uint16_t indic_table[] = { /* 09E0 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x), /* 09E8 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), /* 09F0 */ _(C,x), _(C,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), - /* 09F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(SM,x), _(X,x), _(SM,T), _(X,x), + + /* Vedic Extensions */ + + /* 09F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(GB,O), _(X,x), _(SM,T), _(X,x), /* Gurmukhi */ @@ -139,11 +142,17 @@ static const uint16_t indic_table[] = { /* 0A38 */ _(C,x), _(C,x), _(X,x), _(X,x), _(N,B), _(X,x), _(M,R), _(M,L), /* 0A40 */ _(M,R), _(M,B), _(M,B), _(X,x), _(X,x), _(X,x), _(X,x), _(M,T), /* 0A48 */ _(M,T), _(X,x), _(X,x), _(M,T), _(M,T), _(H,B), _(X,x), _(X,x), - /* 0A50 */ _(X,x), _(A,B), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + + /* Vedic Extensions */ + + /* 0A50 */ _(X,x), _(M,O), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + + /* Gurmukhi */ + /* 0A58 */ _(X,x), _(C,x), _(C,x), _(C,x), _(C,x), _(X,x), _(C,x), _(X,x), /* 0A60 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(GB,x), _(GB,x), /* 0A68 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), - /* 0A70 */ _(SM,T), _(SM,T), _(GB,x), _(GB,x), _(X,x), _(CM,B), _(X,x), _(X,x), + /* 0A70 */ _(SM,T), _(SM,T), _(C,O), _(C,O), _(X,x), _(CM,B), _(X,x), _(X,x), /* 0A78 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), /* Gujarati */ @@ -163,7 +172,7 @@ static const uint16_t indic_table[] = { /* 0AE0 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x), /* 0AE8 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), /* 0AF0 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), - /* 0AF8 */ _(X,x), _(C,x), _(A,T), _(A,T), _(A,T), _(N,T), _(N,T), _(N,T), + /* 0AF8 */ _(X,x), _(C,x), _(A,T), _(N,O), _(A,T), _(N,T), _(N,T), _(N,T), /* Oriya */ @@ -177,7 +186,13 @@ static const uint16_t indic_table[] = { /* 0B38 */ _(C,x), _(C,x), _(X,x), _(X,x), _(N,B), _(S,x), _(M,R), _(M,T), /* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(X,x), _(X,x), _(M,L), /* 0B48 */ _(M,TL), _(X,x), _(X,x), _(M,LR),_(M,TLR), _(H,B), _(X,x), _(X,x), - /* 0B50 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(M,T), _(M,T), _(M,TR), + + /* Vedic Extensions */ + + /* 0B50 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(N,O), _(M,T), _(M,TR), + + /* Oriya */ + /* 0B58 */ _(X,x), _(X,x), _(X,x), _(X,x), _(C,x), _(C,x), _(X,x), _(C,x), /* 0B60 */ _(V,x), _(V,x), _(M,B), _(M,B), _(X,x), _(X,x), _(GB,x), _(GB,x), /* 0B68 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), @@ -222,9 +237,12 @@ static const uint16_t indic_table[] = { /* 0C70 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), /* 0C78 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + /* Vedic Extensions */ + + /* 0C80 */ _(GB,O), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x), + /* Kannada */ - /* 0C80 */ _(SM,x), _(SM,T), _(SM,R), _(SM,R), _(X,x), _(V,x), _(V,x), _(V,x), /* 0C88 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(V,x), _(V,x), /* 0C90 */ _(V,x), _(X,x), _(V,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x), /* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), @@ -243,7 +261,7 @@ static const uint16_t indic_table[] = { /* Malayalam */ - /* 0D00 */ _(SM,T), _(SM,T), _(SM,R), _(SM,R), _(SM,x), _(V,x), _(V,x), _(V,x), + /* 0D00 */ _(SM,T), _(SM,T), _(SM,R), _(SM,R), _(GB,O), _(V,x), _(V,x), _(V,x), /* 0D08 */ _(V,x), _(V,x), _(V,x), _(V,x), _(V,x), _(X,x), _(V,x), _(V,x), /* 0D10 */ _(V,x), _(X,x), _(V,x), _(V,x), _(V,x), _(C,x), _(C,x), _(C,x), /* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), @@ -331,9 +349,9 @@ static const uint16_t indic_table[] = { /* 1CD0 */ _(A,T), _(A,T), _(A,T), _(X,x), _(A,O), _(A,B), _(A,B), _(A,B), /* 1CD8 */ _(A,B), _(A,B), _(A,T), _(A,T), _(A,B), _(A,B), _(A,B), _(A,B), - /* 1CE0 */ _(A,T), _(A,R), _(X,O), _(X,O), _(X,O), _(X,O), _(X,O), _(X,O), - /* 1CE8 */ _(X,O), _(X,x), _(X,x), _(X,x), _(X,x), _(X,B), _(X,x), _(X,x), - /* 1CF0 */ _(X,x), _(X,x), _(C,x), _(C,x), _(A,T), _(CS,x), _(CS,x), _(A,R), + /* 1CE0 */ _(A,T), _(A,R), _(A,O), _(A,O), _(A,O), _(A,O), _(A,O), _(A,O), + /* 1CE8 */ _(A,O), _(S,O), _(S,O), _(S,O), _(S,O), _(A,O), _(S,O), _(S,O), + /* 1CF0 */ _(S,O), _(S,O), _(C,x), _(C,x), _(A,T), _(C,O), _(C,O), _(A,R), /* 1CF8 */ _(A,x), _(A,x), _(GB,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), #define indic_offset_0x2008u 1656 @@ -342,7 +360,10 @@ static const uint16_t indic_table[] = { /* General Punctuation */ /* 2008 */ _(X,x), _(X,x), _(X,x), _(X,x),_(ZWNJ,x),_(ZWJ,x), _(X,x), _(X,x), - /* 2010 */ _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(GB,x), _(X,x), _(X,x), _(X,x), + + /* Vedic Extensions */ + + /* 2010 */ _(GB,O), _(GB,O), _(GB,x), _(GB,x), _(GB,x), _(X,x), _(X,x), _(X,x), #define indic_offset_0x2070u 1672 @@ -360,7 +381,7 @@ static const uint16_t indic_table[] = { /* A8E0 */ _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), /* A8E8 */ _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), _(A,T), - /* A8F0 */ _(A,T), _(A,T), _(SM,x), _(SM,x), _(X,x), _(X,x), _(X,x), _(X,x), + /* A8F0 */ _(A,T), _(A,T), _(S,O), _(S,O), _(S,O), _(S,O), _(S,O), _(S,O), /* A8F8 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(V,x), _(M,T), #define indic_offset_0xa9e0u 1728 @@ -383,7 +404,21 @@ static const uint16_t indic_table[] = { /* AA70 */ _(X,x), _(C,x), _(C,x), _(C,x), _(GB,x), _(GB,x), _(GB,x), _(X,x), /* AA78 */ _(X,x), _(X,x), _(C,x), _(N,R), _(N,T), _(N,R), _(C,x), _(C,x), -}; /* Table items: 1792; occupancy: 71% */ +#define indic_offset_0x11300u 1792 + + + /* Vedic Extensions */ + + /* 11300 */ _(X,x), _(SM,O), _(SM,O), _(SM,O), _(X,x), _(X,x), _(X,x), _(X,x), + /* 11308 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + /* 11310 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + /* 11318 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + /* 11320 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + /* 11328 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + /* 11330 */ _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), _(X,x), + /* 11338 */ _(X,x), _(X,x), _(X,x), _(N,O), _(N,O), _(X,x), _(X,x), _(X,x), + +}; /* Table items: 1856; occupancy: 69% */ uint16_t hb_indic_get_categories (hb_codepoint_t u) @@ -404,7 +439,7 @@ hb_indic_get_categories (hb_codepoint_t u) break; case 0x2u: - if (unlikely (u == 0x25CCu)) return _(GB,x); + if (unlikely (u == 0x25CCu)) return _(DC,O); if (hb_in_range (u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u]; if (hb_in_range (u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u]; break; @@ -415,6 +450,10 @@ hb_indic_get_categories (hb_codepoint_t u) if (hb_in_range (u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u]; break; + case 0x11u: + if (hb_in_range (u, 0x11300u, 0x1133Fu)) return indic_table[u - 0x11300u + indic_offset_0x11300u]; + break; + default: break; } @@ -428,6 +467,7 @@ hb_indic_get_categories (hb_codepoint_t u) #undef ISC_CM #undef ISC_CS #undef ISC_Co +#undef ISC_DC #undef ISC_H #undef ISC_M #undef ISC_N @@ -442,8 +482,6 @@ hb_indic_get_categories (hb_codepoint_t u) #undef ISC_ZWNJ #undef IMC_B -#undef IMC_BL -#undef IMC_BR #undef IMC_L #undef IMC_LR #undef IMC_x @@ -452,11 +490,9 @@ hb_indic_get_categories (hb_codepoint_t u) #undef IMC_T #undef IMC_TB #undef IMC_TBL -#undef IMC_TBR #undef IMC_TL #undef IMC_TLR #undef IMC_TR -#undef IMC_VOL #endif diff --git a/src/hb-ot-shaper-indic.hh b/src/hb-ot-shaper-indic.hh index 9e8e32dd0..babd58dd4 100644 --- a/src/hb-ot-shaper-indic.hh +++ b/src/hb-ot-shaper-indic.hh @@ -265,63 +265,16 @@ set_indic_properties (hb_glyph_info_t &info) indic_category_t cat = (indic_category_t) (type & 0xFFu); indic_position_t pos = (indic_position_t) (type >> 8); - - /* - * Re-assign category - */ - - /* The following act more like the Bindus. */ - if (unlikely (hb_in_range (u, 0x0953u, 0x0954u))) - cat = OT_SM; - /* The following act like consonants. */ - else if (unlikely (hb_in_ranges (u, 0x0A72u, 0x0A73u, - 0x1CF5u, 0x1CF6u))) - cat = OT_C; - /* TODO: The following should only be allowed after a Visarga. - * For now, just treat them like regular tone marks. */ - else if (unlikely (hb_in_range (u, 0x1CE2u, 0x1CE8u))) - cat = OT_A; - /* TODO: The following should only be allowed after some of - * the nasalization marks, maybe only for U+1CE9..U+1CF1. - * For now, just treat them like tone marks. */ - else if (unlikely (u == 0x1CEDu)) - cat = OT_A; - /* The following take marks in standalone clusters, similar to Avagraha. */ - else if (unlikely (hb_in_ranges (u, 0xA8F2u, 0xA8F7u, - 0x1CE9u, 0x1CECu, - 0x1CEEu, 0x1CF1u))) - { - cat = OT_Symbol; - //static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), ""); - } - else if (unlikely (u == 0x0A51u)) - { - /* https://github.com/harfbuzz/harfbuzz/issues/524 */ - cat = OT_M; - pos = POS_BELOW_C; - } - - /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, - * so the Indic shaper needs to know their categories. */ - else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM; - else if (unlikely (u == 0x1133Bu || u == 0x1133Cu)) cat = OT_N; - - else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */ - else if (unlikely (u == 0x0B55u)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/2849 */ - - else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */ - else if (unlikely (u == 0x09FCu)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/1613 */ - else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */ - else if (unlikely (u == 0x0D04u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/3511 */ - else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u))) - cat = OT_PLACEHOLDER; - else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; - - /* * Re-assign position. */ + if (unlikely (u == 0x0A51u)) + { + /* https://github.com/harfbuzz/harfbuzz/issues/524 */ + pos = POS_BELOW_C; + } + if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) { pos = POS_BASE_C; @@ -340,7 +293,6 @@ set_indic_properties (hb_glyph_info_t &info) if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ - info.indic_category() = cat; info.indic_position() = pos; }