diff --git a/src/Makefile.sources b/src/Makefile.sources index e4c31a668..5ec46bac1 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -144,9 +144,7 @@ HB_BASE_sources = \ hb-ot-shaper-indic.cc \ hb-ot-shaper-indic.hh \ hb-ot-shaper-khmer.cc \ - hb-ot-shaper-khmer.hh \ hb-ot-shaper-myanmar.cc \ - hb-ot-shaper-myanmar.hh \ hb-ot-shaper-syllabic.cc \ hb-ot-shaper-syllabic.hh \ hb-ot-shaper-thai.cc \ diff --git a/src/gen-indic-table.py b/src/gen-indic-table.py index 7e38ee1d1..ae1efb4fb 100755 --- a/src/gen-indic-table.py +++ b/src/gen-indic-table.py @@ -42,7 +42,6 @@ files = [open (x, encoding='utf-8') for x in sys.argv[1:]] headers = [[f.readline () for i in range (2)] for f in files] data = [{} for _ in files] -values = [{} for _ in files] for i, f in enumerate (files): for line in f: @@ -65,12 +64,9 @@ for i, f in enumerate (files): for u in range (start, end + 1): data[i][u] = t - values[i][t] = values[i].get (t, 0) + end - start + 1 # Merge data into one dict: defaults = ('Other', 'Not_Applicable', 'No_Block') -for i,v in enumerate (defaults): - values[i][v] = values[i].get (v, 0) + 1 combined = {} for i,d in enumerate (data): for u,v in d.items (): @@ -83,6 +79,358 @@ combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in A data = combined del combined + +# Convert categories & positions types + +category_map = { + 'Other' : 'X', + 'Avagraha' : 'Symbol', + 'Bindu' : 'SM', + 'Brahmi_Joining_Number' : 'PLACEHOLDER', # Don't care. + 'Cantillation_Mark' : 'A', + 'Consonant' : 'C', + 'Consonant_Dead' : 'C', + 'Consonant_Final' : 'CM', + 'Consonant_Head_Letter' : 'C', + 'Consonant_Initial_Postfixed' : 'C', # TODO + 'Consonant_Killer' : 'M', # U+17CD only. + 'Consonant_Medial' : 'CM', + 'Consonant_Placeholder' : 'PLACEHOLDER', + 'Consonant_Preceding_Repha' : 'Repha', + 'Consonant_Prefixed' : 'X', # Don't care. + 'Consonant_Subjoined' : 'CM', + 'Consonant_Succeeding_Repha' : 'CM', + 'Consonant_With_Stacker' : 'CS', + 'Gemination_Mark' : 'SM', # https://github.com/harfbuzz/harfbuzz/issues/552 + 'Invisible_Stacker' : 'Coeng', + 'Joiner' : 'ZWJ', + 'Modifying_Letter' : 'X', + 'Non_Joiner' : 'ZWNJ', + 'Nukta' : 'N', + 'Number' : 'PLACEHOLDER', + 'Number_Joiner' : 'PLACEHOLDER', # Don't care. + 'Pure_Killer' : 'M', # Is like a vowel matra. + 'Register_Shifter' : 'RS', + 'Syllable_Modifier' : 'SM', + 'Tone_Letter' : 'X', + 'Tone_Mark' : 'N', + 'Virama' : 'H', + 'Visarga' : 'SM', + 'Vowel' : 'V', + 'Vowel_Dependent' : 'M', + 'Vowel_Independent' : 'V', + 'Dotted_Circle' : 'DOTTEDCIRCLE', # Ours, not Unicode's + 'Ra' : 'Ra', # Ours, not Unicode's +} +position_map = { + 'Not_Applicable' : 'END', + + 'Left' : 'PRE_C', + 'Top' : 'ABOVE_C', + 'Bottom' : 'BELOW_C', + 'Right' : 'POST_C', + + # These should resolve to the position of the last part of the split sequence. + 'Bottom_And_Right' : 'POST_C', + 'Left_And_Right' : 'POST_C', + 'Top_And_Bottom' : 'BELOW_C', + 'Top_And_Bottom_And_Left' : 'BELOW_C', + 'Top_And_Bottom_And_Right' : 'POST_C', + 'Top_And_Left' : 'ABOVE_C', + 'Top_And_Left_And_Right' : 'POST_C', + 'Top_And_Right' : 'POST_C', + + 'Overstruck' : 'AFTER_MAIN', + 'Visual_order_left' : 'PRE_M', +} + +category_overrides = { + + # These are the variation-selectors. They only appear in the Myanmar grammar + # but are not Myanmar-specific + 0xFE00: 'VS', + 0xFE01: 'VS', + 0xFE02: 'VS', + 0xFE03: 'VS', + 0xFE04: 'VS', + 0xFE05: 'VS', + 0xFE06: 'VS', + 0xFE07: 'VS', + 0xFE08: 'VS', + 0xFE09: 'VS', + 0xFE0A: 'VS', + 0xFE0B: 'VS', + 0xFE0C: 'VS', + 0xFE0D: 'VS', + 0xFE0E: 'VS', + 0xFE0F: 'VS', + + # These appear in the OT Myanmar spec, but are not Myanmar-specific + 0x2015: 'PLACEHOLDER', + 0x2022: 'PLACEHOLDER', + 0x25FB: 'PLACEHOLDER', + 0x25FC: 'PLACEHOLDER', + 0x25FD: 'PLACEHOLDER', + 0x25FE: 'PLACEHOLDER', + + + # Indic + + 0x0930: 'Ra', # Devanagari + 0x09B0: 'Ra', # Bengali + 0x09F0: 'Ra', # Bengali + 0x0A30: 'Ra', # Gurmukhi No Reph + 0x0AB0: 'Ra', # Gujarati + 0x0B30: 'Ra', # Oriya + 0x0BB0: 'Ra', # Tamil No Reph + 0x0C30: 'Ra', # Telugu Reph formed only with ZWJ + 0x0CB0: 'Ra', # Kannada + 0x0D30: 'Ra', # Malayalam No Reph, Logical Repha + 0x0DBB: 'Ra', # Sinhala Reph formed only with ZWJ + + # The following act more like the Bindus. + 0x0953: 'SM', + 0x0954: 'SM', + + # The following act like consonants. + 0x0A72: 'C', + 0x0A73: 'C', + 0x1CF5: 'C', + 0x1CF6: 'C', + + # TODO: The following should only be allowed after a Visarga. + # For now, just treat them like regular tone marks. + 0x1CE2: 'A', + 0x1CE3: 'A', + 0x1CE4: 'A', + 0x1CE5: 'A', + 0x1CE6: 'A', + 0x1CE7: 'A', + 0x1CE8: 'A', + + # TODO: The following should only be allowed after some of + # the nasalization marks, maybe only for U+1CE9..U+1CF1. + # For now, just treat them like tone marks. + 0x1CED: 'A', + + # The following take marks in standalone clusters, similar to Avagraha. + 0xA8F2: 'Symbol', + 0xA8F3: 'Symbol', + 0xA8F4: 'Symbol', + 0xA8F5: 'Symbol', + 0xA8F6: 'Symbol', + 0xA8F7: 'Symbol', + 0x1CE9: 'Symbol', + 0x1CEA: 'Symbol', + 0x1CEB: 'Symbol', + 0x1CEC: 'Symbol', + 0x1CEE: 'Symbol', + 0x1CEF: 'Symbol', + 0x1CF0: 'Symbol', + 0x1CF1: 'Symbol', + + 0x0A51: 'M', # https://github.com/harfbuzz/harfbuzz/issues/524 + + # According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, + # so the Indic shaper needs to know their categories. + 0x11301: 'SM', + 0x11302: 'SM', + 0x11303: 'SM', + 0x1133B: 'N', + 0x1133C: 'N', + + 0x0AFB: 'N', # https://github.com/harfbuzz/harfbuzz/issues/552 + 0x0B55: 'N', # https://github.com/harfbuzz/harfbuzz/issues/2849 + + 0x09FC: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/1613 + 0x0C80: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/623 + 0x0D04: 'PLACEHOLDER', # https://github.com/harfbuzz/harfbuzz/pull/3511 + + 0x25CC: 'DOTTEDCIRCLE', + + + # Khmer + + 0x179A: 'Ra', + + 0x17CC: 'Robatic', + 0x17C9: 'Robatic', + 0x17CA: 'Robatic', + + 0x17C6: 'Xgroup', + 0x17CB: 'Xgroup', + 0x17CD: 'Xgroup', + 0x17CE: 'Xgroup', + 0x17CF: 'Xgroup', + 0x17D0: 'Xgroup', + 0x17D1: 'Xgroup', + + 0x17C7: 'Ygroup', + 0x17C8: 'Ygroup', + 0x17DD: 'Ygroup', + 0x17D3: 'Ygroup', # Just guessing. Uniscribe doesn't categorize it. + + + # Myanmar + + # https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze + + 0x104E: 'C', # The spec says C, IndicSyllableCategory says Consonant_Placeholder + + 0x1004: 'Ra', + 0x101B: 'Ra', + 0x105A: 'Ra', + + 0x1032: 'A', + 0x1036: 'A', + + 0x103A: 'As', + + #0x1040: 'D0', # XXX The spec says D0, but Uniscribe doesn't seem to do. + + 0x103E: 'MH', + 0x1060: 'ML', + 0x103C: 'MR', + 0x103D: 'MW', + 0x1082: 'MW', + 0x103B: 'MY', + 0x105E: 'MY', + 0x105F: 'MY', + + 0x1063: 'PT', + 0x1064: 'PT', + 0x1069: 'PT', + 0x106A: 'PT', + 0x106B: 'PT', + 0x106C: 'PT', + 0x106D: 'PT', + 0xAA7B: 'PT', + + 0x1038: 'SM', + 0x1087: 'SM', + 0x1088: 'SM', + 0x1089: 'SM', + 0x108A: 'SM', + 0x108B: 'SM', + 0x108C: 'SM', + 0x108D: 'SM', + 0x108F: 'SM', + 0x109A: 'SM', + 0x109B: 'SM', + 0x109C: 'SM', + + 0x104A: 'P', + 0x104B: 'P', +} +position_overrides = { + + 0x0A51: 'BELOW_C', # https://github.com/harfbuzz/harfbuzz/issues/524 + + 0x0B01: 'BEFORE_SUB', # Oriya Bindu is BeforeSub in the spec. +} + +def matra_pos_left(u, block): + return "PRE_M" +def matra_pos_right(u, block): + if block == 'Devanagari': return 'AFTER_SUB' + if block == 'Bengali': return 'AFTER_POST' + if block == 'Gurmukhi': return 'AFTER_POST' + if block == 'Gujarati': return 'AFTER_POST' + if block == 'Oriya': return 'AFTER_POST' + if block == 'Tamil': return 'AFTER_POST' + if block == 'Telugu': return 'BEFORE_SUB' if u <= 0x0C42 else 'AFTER_SUB' + if block == 'Kannada': return 'BEFORE_SUB' if u < 0x0CC3 or u > 0x0CD6 else 'AFTER_SUB' + if block == 'Malayalam': return 'AFTER_POST' + if block == 'Sinhala': return 'AFTER_SUB' + return 'AFTER_SUB' +def matra_pos_top(u, block): + # BENG and MLYM don't have top matras. + if block == 'Devanagari': return 'AFTER_SUB' + if block == 'Gurmukhi': return 'AFTER_POST' # Deviate from spec + if block == 'Gujarati': return 'AFTER_SUB' + if block == 'Oriya': return 'AFTER_MAIN' + if block == 'Tamil': return 'AFTER_SUB' + if block == 'Telugu': return 'BEFORE_SUB' + if block == 'Kannada': return 'BEFORE_SUB' + if block == 'Sinhala': return 'AFTER_SUB' + return 'AFTER_SUB' +def matra_pos_bottom(u, block): + if block == 'Devanagari': return 'AFTER_SUB' + if block == 'Bengali': return 'AFTER_SUB' + if block == 'Gurmukhi': return 'AFTER_POST' + if block == 'Gujarati': return 'AFTER_POST' + if block == 'Oriya': return 'AFTER_SUB' + if block == 'Tamil': return 'AFTER_POST' + if block == 'Telugu': return 'BEFORE_SUB' + if block == 'Kannada': return 'BEFORE_SUB' + if block == 'Malayalam': return 'AFTER_POST' + if block == 'Sinhala': return 'AFTER_SUB' + return "AFTER_SUB" +def indic_matra_position(u, pos, block): # Reposition matra + if pos == 'PRE_C': return matra_pos_left(u, block) + if pos == 'POST_C': return matra_pos_right(u, block) + if pos == 'ABOVE_C': return matra_pos_top(u, block) + if pos == 'BELOW_C': return matra_pos_bottom(u, block) + assert (False) + +def position_to_category(pos): + if pos == 'PRE_C': return 'VPre' + if pos == 'ABOVE_C': return 'VAbv' + if pos == 'BELOW_C': return 'VBlw' + if pos == 'POST_C': return 'VPst' + assert(False) + + +defaults = (category_map[defaults[0]], position_map[defaults[1]], defaults[2]) + +new_data = {} +for k, (cat, pos, block) in data.items(): + cat = category_map[cat] + pos = position_map[pos] + new_data[k] = (cat, pos, block) +data = new_data + +for k,new_cat in category_overrides.items(): + (cat, pos, block) = data.get(k, defaults) + data[k] = (new_cat, pos, block) + +# We only expect position for certain types +positioned_categories = ('CM', 'SM', 'RS', 'H', 'M') +for k, (cat, pos, block) in data.items(): + if cat not in positioned_categories: + pos = 'END' + data[k] = (cat, pos, block) + +# Position overrides are more complicated + +# Keep in sync with CONSONANT_FLAGS in the shaper +consonant_categories = ('C', 'CS', 'Ra','CM', 'V', 'PLACEHOLDER', 'DOTTEDCIRCLE') +smvd_categories = ('SM', 'VD', 'A', 'Symbol') +for k, (cat, pos, block) in data.items(): + if cat in consonant_categories: + pos = 'BASE_C' + elif cat == 'M': + if block.startswith('Khmer') or block.startswith('Myanmar'): + cat = position_to_category(pos) + else: + pos = indic_matra_position(u, pos, block) + elif cat in smvd_categories: + pos = 'SMVD'; + data[k] = (cat, pos, block) + +for k,new_pos in position_overrides.items(): + (cat, pos, block) = data.get(k, defaults) + data[k] = (cat, new_pos, block) + + +values = [{_: 1} for _ in defaults] +for vv in data.values(): + for i,v in enumerate(vv): + values[i][v] = values[i].get (v, 0) + 1 + + + + # Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out singles = {} for u in ALLOWED_SINGLES: @@ -111,30 +459,40 @@ print () # Shorten values short = [{ - "Bindu": 'Bi', - "Cantillation_Mark": 'Ca', - "Joiner": 'ZWJ', - "Non_Joiner": 'ZWNJ', - "Number": 'Nd', - "Visarga": 'Vs', - "Vowel": 'Vo', - "Vowel_Dependent": 'M', - "Consonant_Prefixed": 'CPrf', - "Other": 'x', + "Repha": 'Rf', + "Coeng": 'Co', + "PLACEHOLDER": 'GB', + "DOTTEDCIRCLE": 'DC', + "VPst": 'VR', + "VPre": 'VL', + "Robatic": 'Rt', + "Xgroup": 'Xg', + "Ygroup": 'Yg', + "As": 'As', },{ - "Not_Applicable": 'x', + "END": 'X', + "BASE_C": 'C', + "ABOVE_C": 'T', + "BELOW_C": 'B', + "POST_C": 'R', + "PRE_C": 'L', + "PRE_M": 'LM', + "AFTER_MAIN": 'A', + "AFTER_SUB": 'AS', + "BEFORE_SUB": 'BS', + "AFTER_POST": 'AP', + "SMVD": 'SM', }] all_shorts = [{},{}] # Add some of the values, to make them more readable, and to avoid duplicates - for i in range (2): for v,s in short[i].items (): all_shorts[i][s] = v -what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"] -what_short = ["ISC", "IMC"] +what = ["OT", "POS"] +what_short = ["_OT", "_POS"] print ('#pragma GCC diagnostic push') print ('#pragma GCC diagnostic ignored "-Wunused-macros"') cat_defs = [] @@ -150,7 +508,7 @@ for i in range (2): raise Exception ("Duplicate short value alias", v, all_shorts[i][s]) all_shorts[i][s] = v short[i][v] = s - cat_defs.append ((what_short[i] + '_' + s, what[i] + '_' + v.upper (), str (values[i][v]), v)) + cat_defs.append ((what_short[i] + '_' + s, what[i] + '_' + (v.upper () if i else v), str (values[i][v]), v)) maxlen_s = max ([len (c[0]) for c in cat_defs]) maxlen_l = max ([len (c[1]) for c in cat_defs]) @@ -163,7 +521,9 @@ for s in what_short: print () print ('#pragma GCC diagnostic pop') print () -print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)") +print ("#define INDIC_COMBINE_CATEGORIES(S,M) ((S) | ((M) << 8))") +print () +print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (%s_##S, %s_##M)" % tuple(what_short)) print () print () @@ -254,10 +614,11 @@ for p in sorted(pages): print (" default:") print (" break;") print (" }") -print (" return _(x,x);") +print (" return _(X,X);") print ("}") print () print ("#undef _") +print ("#undef INDIC_COMBINE_CATEGORIES") for i in range (2): print () vv = sorted (values[i].keys ()) diff --git a/src/hb-ot-shaper-indic-machine.hh b/src/hb-ot-shaper-indic-machine.hh index b7458c842..0408ed961 100644 --- a/src/hb-ot-shaper-indic-machine.hh +++ b/src/hb-ot-shaper-indic-machine.hh @@ -31,6 +31,14 @@ #include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-shaper-indic.hh" + +using indic_category_t = ot_category_t; +using indic_position_t = ot_position_t; + +#define I_Cat(Cat) indic_syllable_machine_ex_##Cat + enum indic_syllable_type_t { indic_consonant_syllable, indic_vowel_syllable, @@ -41,7 +49,7 @@ enum indic_syllable_type_t { }; -#line 45 "hb-ot-shaper-indic-machine.hh" +#line 53 "hb-ot-shaper-indic-machine.hh" #define indic_syllable_machine_ex_A 9u #define indic_syllable_machine_ex_C 1u #define indic_syllable_machine_ex_CM 16u @@ -62,7 +70,7 @@ enum indic_syllable_type_t { #define indic_syllable_machine_ex_ZWNJ 5u -#line 66 "hb-ot-shaper-indic-machine.hh" +#line 74 "hb-ot-shaper-indic-machine.hh" static const unsigned char _indic_syllable_machine_trans_keys[] = { 8u, 8u, 4u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 15u, 15u, 4u, 8u, 4u, 12u, 4u, 8u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 15u, 15u, @@ -401,11 +409,11 @@ static const int indic_syllable_machine_error = -1; static const int indic_syllable_machine_en_main = 39; -#line 46 "hb-ot-shaper-indic-machine.rl" +#line 54 "hb-ot-shaper-indic-machine.rl" -#line 103 "hb-ot-shaper-indic-machine.rl" +#line 115 "hb-ot-shaper-indic-machine.rl" #define found_syllable(syllable_type) \ @@ -424,7 +432,7 @@ find_syllables_indic (hb_buffer_t *buffer) int cs; hb_glyph_info_t *info = buffer->info; -#line 428 "hb-ot-shaper-indic-machine.hh" +#line 436 "hb-ot-shaper-indic-machine.hh" { cs = indic_syllable_machine_start; ts = 0; @@ -432,7 +440,7 @@ find_syllables_indic (hb_buffer_t *buffer) act = 0; } -#line 123 "hb-ot-shaper-indic-machine.rl" +#line 135 "hb-ot-shaper-indic-machine.rl" p = 0; @@ -440,7 +448,7 @@ find_syllables_indic (hb_buffer_t *buffer) unsigned int syllable_serial = 1; -#line 444 "hb-ot-shaper-indic-machine.hh" +#line 452 "hb-ot-shaper-indic-machine.hh" { int _slen; int _trans; @@ -454,7 +462,7 @@ _resume: #line 1 "NONE" {ts = p;} break; -#line 458 "hb-ot-shaper-indic-machine.hh" +#line 466 "hb-ot-shaper-indic-machine.hh" } _keys = _indic_syllable_machine_trans_keys + (cs<<1); @@ -477,51 +485,51 @@ _eof_trans: {te = p+1;} break; case 11: -#line 99 "hb-ot-shaper-indic-machine.rl" +#line 111 "hb-ot-shaper-indic-machine.rl" {te = p+1;{ found_syllable (indic_non_indic_cluster); }} break; case 13: -#line 94 "hb-ot-shaper-indic-machine.rl" +#line 106 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_consonant_syllable); }} break; case 14: -#line 95 "hb-ot-shaper-indic-machine.rl" +#line 107 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_vowel_syllable); }} break; case 17: -#line 96 "hb-ot-shaper-indic-machine.rl" +#line 108 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_standalone_cluster); }} break; case 19: -#line 97 "hb-ot-shaper-indic-machine.rl" +#line 109 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_symbol_cluster); }} break; case 15: -#line 98 "hb-ot-shaper-indic-machine.rl" +#line 110 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 16: -#line 99 "hb-ot-shaper-indic-machine.rl" +#line 111 "hb-ot-shaper-indic-machine.rl" {te = p;p--;{ found_syllable (indic_non_indic_cluster); }} break; case 1: -#line 94 "hb-ot-shaper-indic-machine.rl" +#line 106 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_consonant_syllable); }} break; case 3: -#line 95 "hb-ot-shaper-indic-machine.rl" +#line 107 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_vowel_syllable); }} break; case 7: -#line 96 "hb-ot-shaper-indic-machine.rl" +#line 108 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_standalone_cluster); }} break; case 8: -#line 97 "hb-ot-shaper-indic-machine.rl" +#line 109 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_symbol_cluster); }} break; case 4: -#line 98 "hb-ot-shaper-indic-machine.rl" +#line 110 "hb-ot-shaper-indic-machine.rl" {{p = ((te))-1;}{ found_syllable (indic_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 6: @@ -542,22 +550,22 @@ _eof_trans: case 18: #line 1 "NONE" {te = p+1;} -#line 94 "hb-ot-shaper-indic-machine.rl" +#line 106 "hb-ot-shaper-indic-machine.rl" {act = 1;} break; case 5: #line 1 "NONE" {te = p+1;} -#line 98 "hb-ot-shaper-indic-machine.rl" +#line 110 "hb-ot-shaper-indic-machine.rl" {act = 5;} break; case 12: #line 1 "NONE" {te = p+1;} -#line 99 "hb-ot-shaper-indic-machine.rl" +#line 111 "hb-ot-shaper-indic-machine.rl" {act = 6;} break; -#line 561 "hb-ot-shaper-indic-machine.hh" +#line 569 "hb-ot-shaper-indic-machine.hh" } _again: @@ -566,7 +574,7 @@ _again: #line 1 "NONE" {ts = 0;} break; -#line 570 "hb-ot-shaper-indic-machine.hh" +#line 578 "hb-ot-shaper-indic-machine.hh" } if ( ++p != pe ) @@ -582,7 +590,7 @@ _again: } -#line 131 "hb-ot-shaper-indic-machine.rl" +#line 143 "hb-ot-shaper-indic-machine.rl" } diff --git a/src/hb-ot-shaper-indic-machine.rl b/src/hb-ot-shaper-indic-machine.rl index 4a7f64e2f..c810114d9 100644 --- a/src/hb-ot-shaper-indic-machine.rl +++ b/src/hb-ot-shaper-indic-machine.rl @@ -29,6 +29,14 @@ #include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-shaper-indic.hh" + +using indic_category_t = ot_category_t; +using indic_position_t = ot_position_t; + +#define I_Cat(Cat) indic_syllable_machine_ex_##Cat + enum indic_syllable_type_t { indic_consonant_syllable, indic_vowel_syllable, @@ -47,6 +55,9 @@ enum indic_syllable_type_t { %%{ + +# These values are replicated from indic.hh, and relisted in indic.cc; keep in sync. + export C = 1; export V = 2; export N = 3; @@ -66,6 +77,7 @@ export CM = 16; export Symbol= 17; export CS = 18; + c = (C | Ra); # is_consonant n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier z = ZWJ|ZWNJ; # is_joiner diff --git a/src/hb-ot-shaper-indic-table.cc b/src/hb-ot-shaper-indic-table.cc index 213523745..c4f99b1bf 100644 --- a/src/hb-ot-shaper-indic-table.cc +++ b/src/hb-ot-shaper-indic-table.cc @@ -23,63 +23,59 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-macros" -#define ISC_A INDIC_SYLLABIC_CATEGORY_AVAGRAHA /* 17 chars; Avagraha */ -#define ISC_Bi INDIC_SYLLABIC_CATEGORY_BINDU /* 91 chars; Bindu */ -#define ISC_BJN INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER /* 20 chars; Brahmi_Joining_Number */ -#define ISC_Ca INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK /* 59 chars; Cantillation_Mark */ -#define ISC_C INDIC_SYLLABIC_CATEGORY_CONSONANT /* 2206 chars; Consonant */ -#define ISC_CD INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD /* 14 chars; Consonant_Dead */ -#define ISC_CF INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL /* 70 chars; Consonant_Final */ -#define ISC_CHL INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER /* 5 chars; Consonant_Head_Letter */ -#define ISC_CIP INDIC_SYLLABIC_CATEGORY_CONSONANT_INITIAL_POSTFIXED /* 1 chars; Consonant_Initial_Postfixed */ -#define ISC_CK INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER /* 2 chars; Consonant_Killer */ -#define ISC_CM INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL /* 31 chars; Consonant_Medial */ -#define ISC_CP INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER /* 22 chars; Consonant_Placeholder */ -#define ISC_CPR INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA /* 3 chars; Consonant_Preceding_Repha */ -#define ISC_CPrf INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED /* 10 chars; Consonant_Prefixed */ -#define ISC_CS INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED /* 94 chars; Consonant_Subjoined */ -#define ISC_CSR INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA /* 1 chars; Consonant_Succeeding_Repha */ -#define ISC_CWS INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER /* 8 chars; Consonant_With_Stacker */ -#define ISC_GM INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK /* 3 chars; Gemination_Mark */ -#define ISC_IS INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER /* 12 chars; Invisible_Stacker */ -#define ISC_ZWJ INDIC_SYLLABIC_CATEGORY_JOINER /* 1 chars; Joiner */ -#define ISC_ML INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER /* 1 chars; Modifying_Letter */ -#define ISC_ZWNJ INDIC_SYLLABIC_CATEGORY_NON_JOINER /* 1 chars; Non_Joiner */ -#define ISC_N INDIC_SYLLABIC_CATEGORY_NUKTA /* 32 chars; Nukta */ -#define ISC_Nd INDIC_SYLLABIC_CATEGORY_NUMBER /* 491 chars; Number */ -#define ISC_NJ INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER /* 1 chars; Number_Joiner */ -#define ISC_x INDIC_SYLLABIC_CATEGORY_OTHER /* 1 chars; Other */ -#define ISC_PK INDIC_SYLLABIC_CATEGORY_PURE_KILLER /* 25 chars; Pure_Killer */ -#define ISC_RS INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER /* 2 chars; Register_Shifter */ -#define ISC_SM INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER /* 25 chars; Syllable_Modifier */ -#define ISC_TL INDIC_SYLLABIC_CATEGORY_TONE_LETTER /* 7 chars; Tone_Letter */ -#define ISC_TM INDIC_SYLLABIC_CATEGORY_TONE_MARK /* 42 chars; Tone_Mark */ -#define ISC_V INDIC_SYLLABIC_CATEGORY_VIRAMA /* 27 chars; Virama */ -#define ISC_Vs INDIC_SYLLABIC_CATEGORY_VISARGA /* 35 chars; Visarga */ -#define ISC_Vo INDIC_SYLLABIC_CATEGORY_VOWEL /* 30 chars; Vowel */ -#define ISC_M INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT /* 686 chars; Vowel_Dependent */ -#define ISC_VI INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT /* 486 chars; Vowel_Independent */ +#define _OT_A OT_A /* 53 chars; A */ +#define _OT_As OT_As /* 1 chars; As */ +#define _OT_C OT_C /* 518 chars; C */ +#define _OT_CM OT_CM /* 1 chars; CM */ +#define _OT_CS OT_CS /* 2 chars; CS */ +#define _OT_Co OT_Coeng /* 2 chars; Coeng */ +#define _OT_DC OT_DOTTEDCIRCLE /* 1 chars; DOTTEDCIRCLE */ +#define _OT_H OT_H /* 10 chars; H */ +#define _OT_M OT_M /* 160 chars; M */ +#define _OT_MH OT_MH /* 1 chars; MH */ +#define _OT_ML OT_ML /* 1 chars; ML */ +#define _OT_MR OT_MR /* 1 chars; MR */ +#define _OT_MW OT_MW /* 2 chars; MW */ +#define _OT_MY OT_MY /* 3 chars; MY */ +#define _OT_N OT_N /* 17 chars; N */ +#define _OT_P OT_P /* 2 chars; P */ +#define _OT_GB OT_PLACEHOLDER /* 172 chars; PLACEHOLDER */ +#define _OT_PT OT_PT /* 8 chars; PT */ +#define _OT_R OT_Ra /* 15 chars; Ra */ +#define _OT_Rf OT_Repha /* 1 chars; Repha */ +#define _OT_Rt OT_Robatic /* 3 chars; Robatic */ +#define _OT_SM OT_SM /* 58 chars; SM */ +#define _OT_S OT_Symbol /* 22 chars; Symbol */ +#define _OT_V OT_V /* 190 chars; V */ +#define _OT_VA OT_VAbv /* 18 chars; VAbv */ +#define _OT_VB OT_VBlw /* 7 chars; VBlw */ +#define _OT_VL OT_VPre /* 5 chars; VPre */ +#define _OT_VR OT_VPst /* 13 chars; VPst */ +#define _OT_VS OT_VS /* 16 chars; VS */ +#define _OT_X OT_X /* 2 chars; X */ +#define _OT_Xg OT_Xgroup /* 7 chars; Xgroup */ +#define _OT_Yg OT_Ygroup /* 4 chars; Ygroup */ +#define _OT_ZWJ OT_ZWJ /* 1 chars; ZWJ */ +#define _OT_ZWNJ OT_ZWNJ /* 1 chars; ZWNJ */ -#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 352 chars; Bottom */ -#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */ -#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 4 chars; Bottom_And_Right */ -#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 64 chars; Left */ -#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 22 chars; Left_And_Right */ -#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */ -#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */ -#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 290 chars; Right */ -#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 418 chars; Top */ -#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */ -#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 2 chars; Top_And_Bottom_And_Left */ -#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */ -#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */ -#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */ -#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */ -#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */ +#define _POS_T POS_ABOVE_C /* 23 chars; ABOVE_C */ +#define _POS_A POS_AFTER_MAIN /* 3 chars; AFTER_MAIN */ +#define _POS_AP POS_AFTER_POST /* 50 chars; AFTER_POST */ +#define _POS_AS POS_AFTER_SUB /* 60 chars; AFTER_SUB */ +#define _POS_C POS_BASE_C /* 899 chars; BASE_C */ +#define _POS_BS POS_BEFORE_SUB /* 31 chars; BEFORE_SUB */ +#define _POS_B POS_BELOW_C /* 13 chars; BELOW_C */ +#define _POS_X POS_END /* 73 chars; END */ +#define _POS_R POS_POST_C /* 13 chars; POST_C */ +#define _POS_L POS_PRE_C /* 5 chars; PRE_C */ +#define _POS_LM POS_PRE_M /* 16 chars; PRE_M */ +#define _POS_SM POS_SMVD /* 132 chars; SMVD */ #pragma GCC diagnostic pop -#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M) +#define INDIC_COMBINE_CATEGORIES(S,M) ((S) | ((M) << 8)) + +#define _(S,M) INDIC_COMBINE_CATEGORIES (_OT_##S, _POS_##M) static const uint16_t indic_table[] = { @@ -90,319 +86,356 @@ static const uint16_t indic_table[] = { /* Basic Latin */ - /* 0028 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), _(x,x), _(x,x), - /* 0030 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0038 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0028 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(GB,C), _(X,X), _(X,X), + /* 0030 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0038 */ _(GB,C), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), #define indic_offset_0x00b0u 24 /* Latin-1 Supplement */ - /* 00B0 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 00B8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 00C0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 00C8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 00D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), + /* 00B0 */ _(X,X), _(X,X),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X), _(X,X), + /* 00B8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 00C0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 00C8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 00D0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(GB,C), #define indic_offset_0x0900u 64 /* Devanagari */ - /* 0900 */ _(Bi,T), _(Bi,T), _(Bi,T), _(Vs,R), _(VI,x), _(VI,x), _(VI,x), _(VI,x), - /* 0908 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), - /* 0910 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), - /* 0918 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0920 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0928 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0930 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(A,x), _(M,R), _(M,L), - /* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T), - /* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(V,B), _(M,L), _(M,R), - /* 0950 */ _(x,x), _(Ca,T), _(Ca,B), _(x,T), _(x,T), _(M,T), _(M,B), _(M,B), - /* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0960 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0968 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0970 */ _(x,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), - /* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0900 */_(SM,SM),_(SM,SM),_(SM,SM),_(SM,SM), _(V,C), _(V,C), _(V,C), _(V,C), + /* 0908 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), + /* 0910 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(C,C), _(C,C), _(C,C), + /* 0918 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0920 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0928 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0930 */ _(R,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0938 */ _(C,C), _(C,C), _(M,AS), _(M,AS), _(N,X), _(S,SM), _(M,AS), _(M,LM), + /* 0940 */ _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), + /* 0948 */ _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(H,B), _(M,LM), _(M,AS), + /* 0950 */ _(X,X), _(A,SM), _(A,SM),_(SM,SM),_(SM,SM), _(M,AS), _(M,AS), _(M,AS), + /* 0958 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0960 */ _(V,C), _(V,C), _(M,AS), _(M,AS), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0968 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0970 */ _(X,X), _(X,X), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), + /* 0978 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), /* Bengali */ - /* 0980 */ _(CP,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), - /* 0988 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x), - /* 0990 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), - /* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 09A0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 09A8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 09B0 */ _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), - /* 09B8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L), - /* 09C0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L), - /* 09C8 */ _(M,L), _(x,x), _(x,x), _(M,LR), _(M,LR), _(V,B), _(CD,x), _(x,x), - /* 09D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), - /* 09D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), - /* 09E0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 09E8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 09F0 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 09F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(Bi,x), _(x,x), _(SM,T), _(x,x), + /* 0980 */ _(GB,C),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(V,C), _(V,C), _(V,C), + /* 0988 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), _(X,X), _(V,C), + /* 0990 */ _(V,C), _(X,X), _(X,X), _(V,C), _(V,C), _(C,C), _(C,C), _(C,C), + /* 0998 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 09A0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 09A8 */ _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 09B0 */ _(R,C), _(X,X), _(C,C), _(X,X), _(X,X), _(X,X), _(C,C), _(C,C), + /* 09B8 */ _(C,C), _(C,C), _(X,X), _(X,X), _(N,X), _(S,SM), _(M,AP), _(M,LM), + /* 09C0 */ _(M,AP), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(X,X), _(X,X), _(M,LM), + /* 09C8 */ _(M,LM), _(X,X), _(X,X), _(M,AP), _(M,AP), _(H,B), _(C,C), _(X,X), + /* 09D0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(M,AP), + /* 09D8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(C,C), _(C,C), _(X,X), _(C,C), + /* 09E0 */ _(V,C), _(V,C), _(M,AS), _(M,AS), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 09E8 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 09F0 */ _(R,C), _(C,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 09F8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(GB,C), _(X,X),_(SM,SM), _(X,X), /* Gurmukhi */ - /* 0A00 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), - /* 0A08 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), - /* 0A10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), - /* 0A18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0A20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0A28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0A30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), - /* 0A38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(x,x), _(M,R), _(M,L), - /* 0A40 */ _(M,R), _(M,B), _(M,B), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), - /* 0A48 */ _(M,T), _(x,x), _(x,x), _(M,T), _(M,T), _(V,B), _(x,x), _(x,x), - /* 0A50 */ _(x,x), _(Ca,B), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0A58 */ _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), - /* 0A60 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0A68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0A70 */ _(Bi,T), _(GM,T), _(CP,x), _(CP,x), _(x,x), _(CM,B), _(x,x), _(x,x), - /* 0A78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0A00 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(V,C), _(V,C), _(V,C), + /* 0A08 */ _(V,C), _(V,C), _(V,C), _(X,X), _(X,X), _(X,X), _(X,X), _(V,C), + /* 0A10 */ _(V,C), _(X,X), _(X,X), _(V,C), _(V,C), _(C,C), _(C,C), _(C,C), + /* 0A18 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0A20 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0A28 */ _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0A30 */ _(R,C), _(X,X), _(C,C), _(C,C), _(X,X), _(C,C), _(C,C), _(X,X), + /* 0A38 */ _(C,C), _(C,C), _(X,X), _(X,X), _(N,X), _(X,X), _(M,AP), _(M,LM), + /* 0A40 */ _(M,AP), _(M,AP), _(M,AP), _(X,X), _(X,X), _(X,X), _(X,X), _(M,AP), + /* 0A48 */ _(M,AP), _(X,X), _(X,X), _(M,AP), _(M,AP), _(H,B), _(X,X), _(X,X), + /* 0A50 */ _(X,X), _(M,B), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0A58 */ _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(X,X), _(C,C), _(X,X), + /* 0A60 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0A68 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0A70 */_(SM,SM),_(SM,SM), _(C,C), _(C,C), _(X,X), _(CM,C), _(X,X), _(X,X), + /* 0A78 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* Gujarati */ - /* 0A80 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), - /* 0A88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), - /* 0A90 */ _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), - /* 0A98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0AA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0AA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0AB0 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), - /* 0AB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L), - /* 0AC0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(x,x), _(M,T), - /* 0AC8 */ _(M,T), _(M,TR), _(x,x), _(M,R), _(M,R), _(V,B), _(x,x), _(x,x), - /* 0AD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0AD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0AE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0AE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0AF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0AF8 */ _(x,x), _(C,x), _(Ca,T), _(Ca,T), _(Ca,T), _(N,T), _(N,T), _(N,T), + /* 0A80 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(V,C), _(V,C), _(V,C), + /* 0A88 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), _(V,C), + /* 0A90 */ _(V,C), _(V,C), _(X,X), _(V,C), _(V,C), _(C,C), _(C,C), _(C,C), + /* 0A98 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0AA0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0AA8 */ _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0AB0 */ _(R,C), _(X,X), _(C,C), _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), + /* 0AB8 */ _(C,C), _(C,C), _(X,X), _(X,X), _(N,X), _(S,SM), _(M,AP), _(M,LM), + /* 0AC0 */ _(M,AP), _(M,AP), _(M,AP), _(M,AP), _(M,AP), _(M,AS), _(X,X), _(M,AS), + /* 0AC8 */ _(M,AS), _(M,AP), _(X,X), _(M,AP), _(M,AP), _(H,B), _(X,X), _(X,X), + /* 0AD0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0AD8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0AE0 */ _(V,C), _(V,C), _(M,AP), _(M,AP), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0AE8 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0AF0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0AF8 */ _(X,X), _(C,C), _(A,SM), _(N,X), _(A,SM), _(N,X), _(N,X), _(N,X), /* Oriya */ - /* 0B00 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), - /* 0B08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x), - /* 0B10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), - /* 0B18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0B20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0B28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0B30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), - /* 0B38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T), - /* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L), - /* 0B48 */ _(M,TL), _(x,x), _(x,x), _(M,LR),_(M,TLR), _(V,B), _(x,x), _(x,x), - /* 0B50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,T), _(M,TR), - /* 0B58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), - /* 0B60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0B68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0B70 */ _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0B78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0B00 */ _(X,X),_(SM,BS),_(SM,SM),_(SM,SM), _(X,X), _(V,C), _(V,C), _(V,C), + /* 0B08 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), _(X,X), _(V,C), + /* 0B10 */ _(V,C), _(X,X), _(X,X), _(V,C), _(V,C), _(C,C), _(C,C), _(C,C), + /* 0B18 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0B20 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0B28 */ _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0B30 */ _(R,C), _(X,X), _(C,C), _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), + /* 0B38 */ _(C,C), _(C,C), _(X,X), _(X,X), _(N,X), _(S,SM), _(M,AP), _(M,A), + /* 0B40 */ _(M,AP), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(X,X), _(X,X), _(M,LM), + /* 0B48 */ _(M,A), _(X,X), _(X,X), _(M,AP), _(M,AP), _(H,B), _(X,X), _(X,X), + /* 0B50 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(N,X), _(M,A), _(M,AP), + /* 0B58 */ _(X,X), _(X,X), _(X,X), _(X,X), _(C,C), _(C,C), _(X,X), _(C,C), + /* 0B60 */ _(V,C), _(V,C), _(M,AS), _(M,AS), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0B68 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0B70 */ _(X,X), _(C,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0B78 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* Tamil */ - /* 0B80 */ _(x,x), _(x,x), _(Bi,T), _(ML,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), - /* 0B88 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(VI,x), - /* 0B90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(x,x), _(x,x), - /* 0B98 */ _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(C,x), _(C,x), - /* 0BA0 */ _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), - /* 0BA8 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), - /* 0BB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0BB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), - /* 0BC0 */ _(M,T), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(M,L), _(M,L), - /* 0BC8 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T), _(x,x), _(x,x), - /* 0BD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), - /* 0BD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0BE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0BE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0BF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0BF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0B80 */ _(X,X), _(X,X),_(SM,SM), _(X,X), _(X,X), _(V,C), _(V,C), _(V,C), + /* 0B88 */ _(V,C), _(V,C), _(V,C), _(X,X), _(X,X), _(X,X), _(V,C), _(V,C), + /* 0B90 */ _(V,C), _(X,X), _(V,C), _(V,C), _(V,C), _(C,C), _(X,X), _(X,X), + /* 0B98 */ _(X,X), _(C,C), _(C,C), _(X,X), _(C,C), _(X,X), _(C,C), _(C,C), + /* 0BA0 */ _(X,X), _(X,X), _(X,X), _(C,C), _(C,C), _(X,X), _(X,X), _(X,X), + /* 0BA8 */ _(C,C), _(C,C), _(C,C), _(X,X), _(X,X), _(X,X), _(C,C), _(C,C), + /* 0BB0 */ _(R,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0BB8 */ _(C,C), _(C,C), _(X,X), _(X,X), _(X,X), _(X,X), _(M,AP), _(M,AP), + /* 0BC0 */ _(M,AS), _(M,AP), _(M,AP), _(X,X), _(X,X), _(X,X), _(M,LM), _(M,LM), + /* 0BC8 */ _(M,LM), _(X,X), _(M,AP), _(M,AP), _(M,AP), _(H,T), _(X,X), _(X,X), + /* 0BD0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(M,AP), + /* 0BD8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0BE0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0BE8 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0BF0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0BF8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* Telugu */ - /* 0C00 */ _(Bi,T), _(Bi,R), _(Bi,R), _(Vs,R), _(Bi,T), _(VI,x), _(VI,x), _(VI,x), - /* 0C08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), - /* 0C10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), - /* 0C18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0C20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0C28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0C30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0C38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,T), _(M,T), - /* 0C40 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,T), - /* 0C48 */ _(M,TB), _(x,x), _(M,T), _(M,T), _(M,T), _(V,T), _(x,x), _(x,x), - /* 0C50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,B), _(x,x), - /* 0C58 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(CD,x), _(x,x), _(x,x), - /* 0C60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0C68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0C70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0C78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0C00 */_(SM,SM),_(SM,SM),_(SM,SM),_(SM,SM),_(SM,SM), _(V,C), _(V,C), _(V,C), + /* 0C08 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), _(V,C), _(V,C), + /* 0C10 */ _(V,C), _(X,X), _(V,C), _(V,C), _(V,C), _(C,C), _(C,C), _(C,C), + /* 0C18 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0C20 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0C28 */ _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0C30 */ _(R,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0C38 */ _(C,C), _(C,C), _(X,X), _(X,X), _(N,X), _(S,SM), _(M,BS), _(M,BS), + /* 0C40 */ _(M,BS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(X,X), _(M,BS), _(M,BS), + /* 0C48 */ _(M,BS), _(X,X), _(M,BS), _(M,BS), _(M,BS), _(H,T), _(X,X), _(X,X), + /* 0C50 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(M,BS), _(M,BS), _(X,X), + /* 0C58 */ _(C,C), _(C,C), _(C,C), _(X,X), _(X,X), _(C,C), _(X,X), _(X,X), + /* 0C60 */ _(V,C), _(V,C), _(M,BS), _(M,BS), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0C68 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0C70 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0C78 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* Kannada */ - /* 0C80 */ _(Bi,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), - /* 0C88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), - /* 0C90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), - /* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0CA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0CA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0CB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), - /* 0CB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T), - /* 0CC0 */ _(M,TR), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,TR), - /* 0CC8 */ _(M,TR), _(x,x), _(M,TR), _(M,TR), _(M,T), _(V,T), _(x,x), _(x,x), - /* 0CD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), - /* 0CD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CD,x), _(C,x), _(x,x), - /* 0CE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0CE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0CF0 */ _(x,x),_(CWS,x),_(CWS,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0CF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0C80 */ _(GB,C),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(V,C), _(V,C), _(V,C), + /* 0C88 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), _(V,C), _(V,C), + /* 0C90 */ _(V,C), _(X,X), _(V,C), _(V,C), _(V,C), _(C,C), _(C,C), _(C,C), + /* 0C98 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0CA0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0CA8 */ _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0CB0 */ _(R,C), _(C,C), _(C,C), _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), + /* 0CB8 */ _(C,C), _(C,C), _(X,X), _(X,X), _(N,X), _(S,SM), _(M,BS), _(M,BS), + /* 0CC0 */ _(M,BS), _(M,BS), _(M,BS), _(M,BS), _(M,BS), _(X,X), _(M,BS), _(M,BS), + /* 0CC8 */ _(M,BS), _(X,X), _(M,BS), _(M,BS), _(M,BS), _(H,T), _(X,X), _(X,X), + /* 0CD0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(M,BS), _(M,BS), _(X,X), + /* 0CD8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(C,C), _(C,C), _(X,X), + /* 0CE0 */ _(V,C), _(V,C), _(M,BS), _(M,BS), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0CE8 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0CF0 */ _(X,X), _(CS,C), _(CS,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0CF8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), /* Malayalam */ - /* 0D00 */ _(Bi,T), _(Bi,T), _(Bi,R), _(Vs,R), _(Bi,x), _(VI,x), _(VI,x), _(VI,x), - /* 0D08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), - /* 0D10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), - /* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0D20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0D28 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0D30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0D38 */ _(C,x), _(C,x), _(C,x), _(PK,T), _(PK,T), _(A,x), _(M,R), _(M,R), - /* 0D40 */ _(M,R), _(M,R), _(M,R), _(M,B), _(M,B), _(x,x), _(M,L), _(M,L), - /* 0D48 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T),_(CPR,T), _(x,x), - /* 0D50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(M,R), - /* 0D58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), - /* 0D60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0D68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0D70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 0D78 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), + /* 0D00 */_(SM,SM),_(SM,SM),_(SM,SM),_(SM,SM), _(GB,C), _(V,C), _(V,C), _(V,C), + /* 0D08 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), _(V,C), _(V,C), + /* 0D10 */ _(V,C), _(X,X), _(V,C), _(V,C), _(V,C), _(C,C), _(C,C), _(C,C), + /* 0D18 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0D20 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0D28 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0D30 */ _(R,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0D38 */ _(C,C), _(C,C), _(C,C), _(M,AS), _(M,AS), _(S,SM), _(M,AP), _(M,AP), + /* 0D40 */ _(M,AP), _(M,AP), _(M,AP), _(M,AP), _(M,AP), _(X,X), _(M,LM), _(M,LM), + /* 0D48 */ _(M,LM), _(X,X), _(M,AP), _(M,AP), _(M,AP), _(H,T), _(Rf,X), _(X,X), + /* 0D50 */ _(X,X), _(X,X), _(X,X), _(X,X), _(C,C), _(C,C), _(C,C), _(M,AP), + /* 0D58 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(V,C), + /* 0D60 */ _(V,C), _(V,C), _(M,AP), _(M,AP), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0D68 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0D70 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 0D78 */ _(X,X), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), /* Sinhala */ - /* 0D80 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), - /* 0D88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), - /* 0D90 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), - /* 0D98 */ _(x,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0DA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0DA8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0DB0 */ _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 0DB8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), - /* 0DC0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), - /* 0DC8 */ _(x,x), _(x,x), _(V,T), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), - /* 0DD0 */ _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), _(x,x), _(M,B), _(x,x), - /* 0DD8 */ _(M,R), _(M,L), _(M,TL), _(M,L), _(M,LR),_(M,TLR), _(M,LR), _(M,R), - /* 0DE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), - /* 0DE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 0DF0 */ _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0D80 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(V,C), _(V,C), _(V,C), + /* 0D88 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), + /* 0D90 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), + /* 0D98 */ _(X,X), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0DA0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0DA8 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0DB0 */ _(C,C), _(C,C), _(X,X), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 0DB8 */ _(C,C), _(C,C), _(C,C), _(R,C), _(X,X), _(C,C), _(X,X), _(X,X), + /* 0DC0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(X,X), + /* 0DC8 */ _(X,X), _(X,X), _(H,T), _(X,X), _(X,X), _(X,X), _(X,X), _(M,AS), + /* 0DD0 */ _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(M,AS), _(X,X), _(M,AS), _(X,X), + /* 0DD8 */ _(M,AS), _(M,LM), _(M,AS), _(M,LM), _(M,AS), _(M,AS), _(M,AS), _(M,AS), + /* 0DE0 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), + /* 0DE8 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 0DF0 */ _(X,X), _(X,X), _(M,AS), _(M,AS), _(X,X), _(X,X), _(X,X), _(X,X), #define indic_offset_0x1000u 1336 /* Myanmar */ - /* 1000 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 1008 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 1010 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 1018 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 1020 */ _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), - /* 1028 */ _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), - /* 1030 */ _(M,B), _(M,L), _(M,T), _(M,T), _(M,T), _(M,T), _(Bi,T), _(TM,B), - /* 1038 */ _(Vs,R), _(IS,x), _(PK,T), _(CM,R),_(CM,TBL), _(CM,B), _(CM,B), _(C,x), - /* 1040 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 1048 */ _(Nd,x), _(Nd,x), _(x,x), _(CP,x), _(x,x), _(x,x), _(CP,x), _(x,x), - /* 1050 */ _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), - /* 1058 */ _(M,B), _(M,B), _(C,x), _(C,x), _(C,x), _(C,x), _(CM,B), _(CM,B), - /* 1060 */ _(CM,B), _(C,x), _(M,R), _(TM,R), _(TM,R), _(C,x), _(C,x), _(M,R), - /* 1068 */ _(M,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(C,x), _(C,x), - /* 1070 */ _(C,x), _(M,T), _(M,T), _(M,T), _(M,T), _(C,x), _(C,x), _(C,x), - /* 1078 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 1080 */ _(C,x), _(C,x), _(CM,B), _(M,R), _(M,L), _(M,T), _(M,T), _(TM,R), - /* 1088 */ _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,B), _(C,x), _(TM,R), - /* 1090 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 1098 */ _(Nd,x), _(Nd,x), _(TM,R), _(TM,R), _(M,R), _(M,T), _(x,x), _(x,x), + /* 1000 */ _(C,C), _(C,C), _(C,C), _(C,C), _(R,C), _(C,C), _(C,C), _(C,C), + /* 1008 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 1010 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 1018 */ _(C,C), _(C,C), _(C,C), _(R,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 1020 */ _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), + /* 1028 */ _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R), _(VA,T), _(VA,T), _(VB,B), + /* 1030 */ _(VB,B), _(VL,L), _(A,SM), _(VA,T), _(VA,T), _(VA,T), _(A,SM), _(N,X), + /* 1038 */_(SM,SM), _(Co,X), _(As,X), _(MY,X), _(MR,X), _(MW,X), _(MH,X), _(C,C), + /* 1040 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 1048 */ _(GB,C), _(GB,C), _(P,X), _(P,X), _(X,X), _(X,X), _(C,C), _(X,X), + /* 1050 */ _(C,C), _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(VR,R), _(VR,R), + /* 1058 */ _(VB,B), _(VB,B), _(R,C), _(C,C), _(C,C), _(C,C), _(MY,X), _(MY,X), + /* 1060 */ _(ML,X), _(C,C), _(VR,R), _(PT,X), _(PT,X), _(C,C), _(C,C), _(VR,R), + /* 1068 */ _(VR,R), _(PT,X), _(PT,X), _(PT,X), _(PT,X), _(PT,X), _(C,C), _(C,C), + /* 1070 */ _(C,C), _(VA,T), _(VA,T), _(VA,T), _(VA,T), _(C,C), _(C,C), _(C,C), + /* 1078 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 1080 */ _(C,C), _(C,C), _(MW,X), _(VR,R), _(VL,L), _(VA,T), _(VA,T),_(SM,SM), + /* 1088 */_(SM,SM),_(SM,SM),_(SM,SM),_(SM,SM),_(SM,SM),_(SM,SM), _(C,C),_(SM,SM), + /* 1090 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 1098 */ _(GB,C), _(GB,C),_(SM,SM),_(SM,SM),_(SM,SM), _(VA,T), _(X,X), _(X,X), #define indic_offset_0x1780u 1496 /* Khmer */ - /* 1780 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 1788 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 1790 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 1798 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* 17A0 */ _(C,x), _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), - /* 17A8 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), - /* 17B0 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(M,R), _(M,T), - /* 17B8 */ _(M,T), _(M,T), _(M,T), _(M,B), _(M,B), _(M,B), _(M,TL),_(M,TLR), - /* 17C0 */ _(M,LR), _(M,L), _(M,L), _(M,L), _(M,LR), _(M,LR), _(Bi,T), _(Vs,R), - /* 17C8 */ _(M,R), _(RS,T), _(RS,T), _(SM,T),_(CSR,T), _(CK,T), _(SM,T), _(SM,T), - /* 17D0 */ _(SM,T), _(PK,T), _(IS,x), _(SM,T), _(x,x), _(x,x), _(x,x), _(x,x), - /* 17D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(A,x), _(SM,T), _(x,x), _(x,x), - /* 17E0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* 17E8 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 1780 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 1788 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 1790 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 1798 */ _(C,C), _(C,C), _(R,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* 17A0 */ _(C,C), _(C,C), _(C,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), + /* 17A8 */ _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), _(V,C), + /* 17B0 */ _(V,C), _(V,C), _(V,C), _(V,C), _(X,X), _(X,X), _(VR,R), _(VA,T), + /* 17B8 */ _(VA,T), _(VA,T), _(VA,T), _(VB,B), _(VB,B), _(VB,B), _(VA,T), _(VR,R), + /* 17C0 */ _(VR,R), _(VL,L), _(VL,L), _(VL,L), _(VR,R), _(VR,R), _(Xg,X), _(Yg,X), + /* 17C8 */ _(Yg,X), _(Rt,X), _(Rt,X), _(Xg,X), _(Rt,X), _(Xg,X), _(Xg,X), _(Xg,X), + /* 17D0 */ _(Xg,X), _(Xg,X), _(Co,X), _(Yg,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 17D8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(S,SM), _(Yg,X), _(X,X), _(X,X), + /* 17E0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* 17E8 */ _(GB,C), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), #define indic_offset_0x1cd0u 1608 /* Vedic Extensions */ - /* 1CD0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(x,x), _(Ca,O), _(Ca,B), _(Ca,B), _(Ca,B), - /* 1CD8 */ _(Ca,B), _(Ca,B), _(Ca,T), _(Ca,T), _(Ca,B), _(Ca,B), _(Ca,B), _(Ca,B), - /* 1CE0 */ _(Ca,T), _(Ca,R), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), - /* 1CE8 */ _(x,O), _(x,x), _(x,x), _(x,x), _(x,x), _(x,B), _(x,x), _(x,x), - /* 1CF0 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(Ca,T),_(CWS,x),_(CWS,x), _(Ca,R), - /* 1CF8 */ _(Ca,x), _(Ca,x), _(CP,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 1CD0 */ _(A,SM), _(A,SM), _(A,SM), _(X,X), _(A,SM), _(A,SM), _(A,SM), _(A,SM), + /* 1CD8 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), + /* 1CE0 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), + /* 1CE8 */ _(A,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(A,SM), _(S,SM), _(S,SM), + + /* No_Block */ + + /* 1CF0 */ _(S,SM), _(S,SM), _(C,C), _(C,C), _(A,SM), _(C,C), _(C,C), _(A,SM), + + /* Vedic Extensions */ + + /* 1CF8 */ _(A,SM), _(A,SM), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), #define indic_offset_0x2008u 1656 /* General Punctuation */ - /* 2008 */ _(x,x), _(x,x), _(x,x), _(x,x),_(ZWNJ,x),_(ZWJ,x), _(x,x), _(x,x), - /* 2010 */ _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), _(x,x), _(x,x), + /* 2008 */ _(X,X), _(X,X), _(X,X), _(X,X),_(ZWNJ,X),_(ZWJ,X), _(X,X), _(X,X), + /* 2010 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), _(X,X), + /* 2018 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), -#define indic_offset_0x2070u 1672 + /* No_Block */ + + /* 2020 */ _(X,X), _(X,X), _(GB,C), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + +#define indic_offset_0x2070u 1688 /* Superscripts and Subscripts */ - /* 2070 */ _(x,x), _(x,x), _(x,x), _(x,x), _(SM,x), _(x,x), _(x,x), _(x,x), - /* 2078 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* 2080 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), + /* 2070 */ _(X,X), _(X,X), _(X,X), _(X,X),_(SM,SM), _(X,X), _(X,X), _(X,X), + /* 2078 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 2080 */ _(X,X), _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X), -#define indic_offset_0xa8e0u 1696 +#define indic_offset_0x25f8u 1712 + + + /* No_Block */ + + /* 25F8 */ _(X,X), _(X,X), _(X,X), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), + +#define indic_offset_0xa8e0u 1720 /* Devanagari Extended */ - /* A8E0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), - /* A8E8 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), - /* A8F0 */ _(Ca,T), _(Ca,T), _(Bi,x), _(Bi,x), _(x,x), _(x,x), _(x,x), _(x,x), - /* A8F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(M,T), + /* A8E0 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), + /* A8E8 */ _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), _(A,SM), + /* A8F0 */ _(A,SM), _(A,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), _(S,SM), + /* A8F8 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(V,C), _(M,AS), -#define indic_offset_0xa9e0u 1728 +#define indic_offset_0xa9e0u 1752 /* Myanmar Extended-B */ - /* A9E0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(M,T), _(x,x), _(C,x), - /* A9E8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* A9F0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), - /* A9F8 */ _(Nd,x), _(Nd,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), + /* A9E0 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(VA,T), _(X,X), _(C,C), + /* A9E8 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* A9F0 */ _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), _(GB,C), + /* A9F8 */ _(GB,C), _(GB,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(X,X), -#define indic_offset_0xaa60u 1760 +#define indic_offset_0xaa60u 1784 /* Myanmar Extended-A */ - /* AA60 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* AA68 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), - /* AA70 */ _(x,x), _(C,x), _(C,x), _(C,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), - /* AA78 */ _(x,x), _(x,x), _(C,x), _(TM,R), _(TM,T), _(TM,R), _(C,x), _(C,x), + /* AA60 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* AA68 */ _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), _(C,C), + /* AA70 */ _(X,X), _(C,C), _(C,C), _(C,C), _(GB,C), _(GB,C), _(GB,C), _(X,X), + /* AA78 */ _(X,X), _(X,X), _(C,C), _(PT,X), _(N,X), _(N,X), _(C,C), _(C,C), -}; /* Table items: 1792; occupancy: 71% */ +#define indic_offset_0xfe00u 1816 + + + /* No_Block */ + + /* FE00 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), + /* FE08 */ _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), _(VS,X), + +#define indic_offset_0x11300u 1832 + + /* 11300 */ _(X,X),_(SM,SM),_(SM,SM),_(SM,SM), _(X,X), _(X,X), _(X,X), _(X,X), + /* 11308 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 11310 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 11318 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 11320 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 11328 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 11330 */ _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), _(X,X), + /* 11338 */ _(X,X), _(X,X), _(X,X), _(N,X), _(N,X), _(X,X), _(X,X), _(X,X), + +}; /* Table items: 1896; occupancy: 69% */ uint16_t hb_indic_get_categories (hb_codepoint_t u) @@ -410,7 +443,7 @@ hb_indic_get_categories (hb_codepoint_t u) switch (u >> 12) { case 0x0u: - if (unlikely (u == 0x00A0u)) return _(CP,x); + if (unlikely (u == 0x00A0u)) return _(GB,C); if (hb_in_range (u, 0x0028u, 0x003Fu)) return indic_table[u - 0x0028u + indic_offset_0x0028u]; if (hb_in_range (u, 0x00B0u, 0x00D7u)) return indic_table[u - 0x00B0u + indic_offset_0x00b0u]; if (hb_in_range (u, 0x0900u, 0x0DF7u)) return indic_table[u - 0x0900u + indic_offset_0x0900u]; @@ -423,9 +456,10 @@ hb_indic_get_categories (hb_codepoint_t u) break; case 0x2u: - if (unlikely (u == 0x25CCu)) return _(CP,x); - if (hb_in_range (u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u]; + if (unlikely (u == 0x25CCu)) return _(DC,C); + if (hb_in_range (u, 0x2008u, 0x2027u)) return indic_table[u - 0x2008u + indic_offset_0x2008u]; if (hb_in_range (u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u]; + if (hb_in_range (u, 0x25F8u, 0x25FFu)) return indic_table[u - 0x25F8u + indic_offset_0x25f8u]; break; case 0xAu: @@ -434,67 +468,70 @@ hb_indic_get_categories (hb_codepoint_t u) if (hb_in_range (u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u]; break; + case 0xFu: + if (hb_in_range (u, 0xFE00u, 0xFE0Fu)) return indic_table[u - 0xFE00u + indic_offset_0xfe00u]; + break; + + case 0x11u: + if (hb_in_range (u, 0x11300u, 0x1133Fu)) return indic_table[u - 0x11300u + indic_offset_0x11300u]; + break; + default: break; } - return _(x,x); + return _(X,X); } #undef _ +#undef INDIC_COMBINE_CATEGORIES -#undef ISC_A -#undef ISC_Bi -#undef ISC_BJN -#undef ISC_Ca -#undef ISC_C -#undef ISC_CD -#undef ISC_CF -#undef ISC_CHL -#undef ISC_CIP -#undef ISC_CK -#undef ISC_CM -#undef ISC_CP -#undef ISC_CPR -#undef ISC_CPrf -#undef ISC_CS -#undef ISC_CSR -#undef ISC_CWS -#undef ISC_GM -#undef ISC_IS -#undef ISC_ZWJ -#undef ISC_ML -#undef ISC_ZWNJ -#undef ISC_N -#undef ISC_Nd -#undef ISC_NJ -#undef ISC_x -#undef ISC_PK -#undef ISC_RS -#undef ISC_SM -#undef ISC_TL -#undef ISC_TM -#undef ISC_V -#undef ISC_Vs -#undef ISC_Vo -#undef ISC_M -#undef ISC_VI +#undef _OT_A +#undef _OT_As +#undef _OT_C +#undef _OT_CM +#undef _OT_CS +#undef _OT_Co +#undef _OT_DC +#undef _OT_H +#undef _OT_M +#undef _OT_MH +#undef _OT_ML +#undef _OT_MR +#undef _OT_MW +#undef _OT_MY +#undef _OT_N +#undef _OT_P +#undef _OT_GB +#undef _OT_PT +#undef _OT_R +#undef _OT_Rf +#undef _OT_Rt +#undef _OT_SM +#undef _OT_S +#undef _OT_V +#undef _OT_VA +#undef _OT_VB +#undef _OT_VL +#undef _OT_VR +#undef _OT_VS +#undef _OT_X +#undef _OT_Xg +#undef _OT_Yg +#undef _OT_ZWJ +#undef _OT_ZWNJ -#undef IMC_B -#undef IMC_BL -#undef IMC_BR -#undef IMC_L -#undef IMC_LR -#undef IMC_x -#undef IMC_O -#undef IMC_R -#undef IMC_T -#undef IMC_TB -#undef IMC_TBL -#undef IMC_TBR -#undef IMC_TL -#undef IMC_TLR -#undef IMC_TR -#undef IMC_VOL +#undef _POS_T +#undef _POS_A +#undef _POS_AP +#undef _POS_AS +#undef _POS_C +#undef _POS_BS +#undef _POS_B +#undef _POS_X +#undef _POS_R +#undef _POS_L +#undef _POS_LM +#undef _POS_SM #endif diff --git a/src/hb-ot-shaper-indic.cc b/src/hb-ot-shaper-indic.cc index c239170ac..33f1ed996 100644 --- a/src/hb-ot-shaper-indic.cc +++ b/src/hb-ot-shaper-indic.cc @@ -39,6 +39,104 @@ */ +#define I_Check(C) static_assert (OT_##C == I_Cat(C), "") + +I_Check (C); +I_Check (V); +I_Check (N); +I_Check (H); +I_Check (ZWNJ); +I_Check (ZWJ); +I_Check (M); +I_Check (SM); +I_Check (A); +I_Check (VD); +I_Check (PLACEHOLDER); +I_Check (DOTTEDCIRCLE); +I_Check (RS); +I_Check (Repha); +I_Check (Ra); +I_Check (CM); +I_Check (Symbol); +I_Check (CS); + +#undef I_Check + +static inline void +set_indic_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + + info.indic_category() = (indic_category_t) (type & 0xFFu); + info.indic_position() = (indic_position_t) (type >> 8); +} + + +static inline bool +is_one_of (const hb_glyph_info_t &info, unsigned int flags) +{ + /* If it ligated, all bets are off. */ + if (_hb_glyph_info_ligated (&info)) return false; + return !!(FLAG_UNSAFE (info.indic_category()) & flags); +} + +/* Note: + * + * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels + * cannot happen in a consonant syllable. The plus side however is, we can call the + * consonant syllable logic from the vowel syllable function and get it all right! + * + * Keep in sync with consonant_categories in the generator. */ +#define CONSONANT_FLAGS_INDIC (FLAG (I_Cat(C)) | FLAG (I_Cat(CS)) | FLAG (I_Cat(Ra)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(V)) | FLAG (I_Cat(PLACEHOLDER)) | FLAG (I_Cat(DOTTEDCIRCLE))) + +static inline bool +is_consonant (const hb_glyph_info_t &info) +{ + return is_one_of (info, CONSONANT_FLAGS_INDIC); +} + +#define JOINER_FLAGS (FLAG (I_Cat(ZWJ)) | FLAG (I_Cat(ZWNJ))) + +static inline bool +is_joiner (const hb_glyph_info_t &info) +{ + return is_one_of (info, JOINER_FLAGS); +} + +static inline bool +is_halant (const hb_glyph_info_t &info) +{ + return is_one_of (info, FLAG (I_Cat(H))); +} + +struct hb_indic_would_substitute_feature_t +{ + void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) + { + zero_context = zero_context_; + map->get_stage_lookups (0/*GSUB*/, + map->get_feature_stage (0/*GSUB*/, feature_tag), + &lookups, &count); + } + + bool would_substitute (const hb_codepoint_t *glyphs, + unsigned int glyphs_count, + hb_face_t *face) const + { + for (unsigned int i = 0; i < count; i++) + if (hb_ot_layout_lookup_would_substitute (face, lookups[i].index, glyphs, glyphs_count, zero_context)) + return true; + return false; + } + + private: + const hb_ot_map_t::lookup_map_t *lookups; + unsigned int count; + bool zero_context; +}; + + /* * Indic configurations. Note that we do not want to keep every single script-specific * behavior in these tables necessarily. This should mainly be used for per-script @@ -96,11 +194,6 @@ static const indic_config_t indic_configs[] = }; - -/* - * Indic shaper. - */ - static const hb_ot_map_feature_t indic_features[] = { @@ -368,7 +461,7 @@ compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) int a = pa->indic_position(); int b = pb->indic_position(); - return a < b ? -1 : a == b ? 0 : +1; + return (int) a - (int) b; } @@ -417,9 +510,9 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, */ if (buffer->props.script == HB_SCRIPT_KANNADA && start + 3 <= end && - is_one_of (info[start ], FLAG (OT_Ra)) && - is_one_of (info[start+1], FLAG (OT_H)) && - is_one_of (info[start+2], FLAG (OT_ZWJ))) + is_one_of (info[start ], FLAG (I_Cat(Ra))) && + is_one_of (info[start+1], FLAG (I_Cat(H))) && + is_one_of (info[start+2], FLAG (I_Cat(ZWJ)))) { buffer->merge_clusters (start+1, start+3); hb_glyph_info_t tmp = info[start+1]; @@ -453,7 +546,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, start + 3 <= end && ( (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) || - (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ) + (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == I_Cat(ZWJ)) )) { /* See if it matches the 'rphf' feature. */ @@ -471,7 +564,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, base = start; has_reph = true; } - } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha) + } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == I_Cat(Repha)) { limit += 1; while (limit < end && is_joiner (info[limit])) @@ -523,8 +616,8 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, * search continues. This is particularly important for Bengali * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */ if (start < i && - info[i].indic_category() == OT_ZWJ && - info[i - 1].indic_category() == OT_H) + info[i].indic_category() == I_Cat(ZWJ) && + info[i - 1].indic_category() == I_Cat(H)) break; } } while (i > limit); @@ -546,7 +639,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, for (unsigned int i = limit; i < end; i++) if (is_consonant (info[i])) { - if (limit < i && info[i - 1].indic_category() == OT_ZWJ) + if (limit < i && info[i - 1].indic_category() == I_Cat(ZWJ)) break; else base = i; @@ -615,7 +708,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, /* Mark final consonants. A final consonant is one appearing after a matra. * Happens in Sinhala. */ for (unsigned int i = base + 1; i < end; i++) - if (info[i].indic_category() == OT_M) { + if (info[i].indic_category() == I_Cat(M)) { for (unsigned int j = i + 1; j < end; j++) if (is_consonant (info[j])) { info[j].indic_position() = POS_FINAL_C; @@ -660,14 +753,14 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, { bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA; for (unsigned int i = base + 1; i < end; i++) - if (info[i].indic_category() == OT_H) + if (info[i].indic_category() == I_Cat(H)) { unsigned int j; for (j = end - 1; j > i; j--) if (is_consonant (info[j]) || - (disallow_double_halants && info[j].indic_category() == OT_H)) + (disallow_double_halants && info[j].indic_category() == I_Cat(H))) break; - if (info[j].indic_category() != OT_H && j > i) { + if (info[j].indic_category() != I_Cat(H) && j > i) { /* Move Halant to after last consonant. */ hb_glyph_info_t t = info[i]; memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); @@ -682,10 +775,10 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, indic_position_t last_pos = POS_START; for (unsigned int i = start; i < end; i++) { - if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H)))) + if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (I_Cat(N)) | FLAG (I_Cat(RS)) | FLAG (I_Cat(CM)) | FLAG (I_Cat(H))))) { info[i].indic_position() = last_pos; - if (unlikely (info[i].indic_category() == OT_H && + if (unlikely (info[i].indic_category() == I_Cat(H) && info[i].indic_position() == POS_PRE_M)) { /* @@ -719,7 +812,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, if (info[j].indic_position() < POS_SMVD) info[j].indic_position() = info[i].indic_position(); last = i; - } else if (info[i].indic_category() == OT_M) + } else if (info[i].indic_category() == I_Cat(M)) last = i; } @@ -850,10 +943,10 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 */ for (unsigned int i = start; i + 1 < base; i++) - if (info[i ].indic_category() == OT_Ra && - info[i+1].indic_category() == OT_H && + if (info[i ].indic_category() == I_Cat(Ra) && + info[i+1].indic_category() == I_Cat(H) && (i + 2 == base || - info[i+2].indic_category() != OT_ZWJ)) + info[i+2].indic_category() != I_Cat(ZWJ))) { info[i ].mask |= indic_plan->mask_array[INDIC_BLWF]; info[i+1].mask |= indic_plan->mask_array[INDIC_BLWF]; @@ -880,7 +973,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, /* Apply ZWJ/ZWNJ effects */ for (unsigned int i = start + 1; i < end; i++) if (is_joiner (info[i])) { - bool non_joiner = info[i].indic_category() == OT_ZWNJ; + bool non_joiner = info[i].indic_category() == I_Cat(ZWNJ); unsigned int j = i; do { @@ -913,7 +1006,7 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, /* For dotted-circle, this is what Uniscribe does: * If dotted-circle is the last glyph, it just does nothing. * Ie. It doesn't form Reph. */ - if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) + if (buffer->info[end - 1].indic_category() == I_Cat(DOTTEDCIRCLE)) return; } @@ -956,8 +1049,8 @@ initial_reordering_indic (const hb_ot_shape_plan_t *plan, update_consonant_positions_indic (plan, font, buffer); hb_syllabic_insert_dotted_circles (font, buffer, indic_broken_cluster, - OT_DOTTEDCIRCLE, - OT_Repha, + I_Cat(DOTTEDCIRCLE), + I_Cat(Repha), POS_END); foreach_syllable (buffer, start, end) @@ -979,7 +1072,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, * and possibly multiple substitutions happened prior to this * phase, and that might have messed up our properties. Recover * from a particular case of that where we're fairly sure that a - * class of OT_H is desired but has been lost. */ + * class of I_Cat(H) is desired but has been lost. */ /* We don't call load_virama_glyph(), since we know it's already * loaded. */ hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed (); @@ -991,7 +1084,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, _hb_glyph_info_multiplied (&info[i])) { /* This will make sure that this glyph passes is_halant() test. */ - info[i].indic_category() = OT_H; + info[i].indic_category() = I_Cat(H); _hb_glyph_info_clear_ligated_and_multiplied (&info[i]); } } @@ -1057,11 +1150,11 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, break; } if (base == end && start < base && - is_one_of (info[base - 1], FLAG (OT_ZWJ))) + is_one_of (info[base - 1], FLAG (I_Cat(ZWJ)))) base--; if (base < end) while (start < base && - is_one_of (info[base], (FLAG (OT_N) | FLAG (OT_H)))) + is_one_of (info[base], (FLAG (I_Cat(N)) | FLAG (I_Cat(H))))) base--; @@ -1106,7 +1199,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, { search: while (new_pos > start && - !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H))))) + !(is_one_of (info[new_pos], (FLAG (I_Cat(M)) | FLAG (I_Cat(H)))))) new_pos--; /* If we found no Halant we are done. @@ -1123,7 +1216,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, if (new_pos + 1 < end) { /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */ - if (info[new_pos + 1].indic_category() == OT_ZWJ) + if (info[new_pos + 1].indic_category() == I_Cat(ZWJ)) { /* Keep searching. */ if (new_pos > start) @@ -1196,7 +1289,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, */ if (start + 1 < end && info[start].indic_position() == POS_RA_TO_BECOME_REPH && - ((info[start].indic_category() == OT_Repha) ^ + ((info[start].indic_category() == I_Cat(Repha)) ^ _hb_glyph_info_ligated_and_didnt_multiply (&info[start]))) { unsigned int new_reph_pos; @@ -1306,7 +1399,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, unlikely (is_halant (info[new_reph_pos]))) { for (unsigned int i = base + 1; i < new_reph_pos; i++) - if (info[i].indic_category() == OT_M) { + if (info[i].indic_category() == I_Cat(M)) { /* Ok, got it. */ new_reph_pos--; } @@ -1366,7 +1459,7 @@ final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) { while (new_pos > start && - !(is_one_of (info[new_pos - 1], FLAG(OT_M) | FLAG (OT_H)))) + !(is_one_of (info[new_pos - 1], FLAG(I_Cat(M)) | FLAG (I_Cat(H))))) new_pos--; } diff --git a/src/hb-ot-shaper-indic.hh b/src/hb-ot-shaper-indic.hh index 439e46912..7e97dd385 100644 --- a/src/hb-ot-shaper-indic.hh +++ b/src/hb-ot-shaper-indic.hh @@ -40,9 +40,15 @@ /* Cateories used in the OpenType spec: * https://docs.microsoft.com/en-us/typography/script-development/devanagari */ -/* Note: This enum is duplicated in the -machine.rl source file. - * Not sure how to avoid duplication. */ -enum indic_category_t { +/* Note: This enum is duplicated the machine machine.rl files. + * We can avoid that by defining this enum in terms of those in the + * indic-table.cc file, but I like this enum duplicated here, because + * this gives us a unified view of all the numbers. + * + * The equality of these and the duplicated numbers is checked by way + * of static_assert's in the respective .cc shaper files. Keep those + * in sync as well. */ +enum ot_category_t { OT_X = 0, OT_C = 1, OT_V = 2, @@ -57,34 +63,43 @@ enum indic_category_t { OT_PLACEHOLDER = 10, OT_DOTTEDCIRCLE = 11, OT_RS = 12, /* Register Shifter, used in Khmer OT spec. */ - OT_Coeng = 13, /* Khmer-style Virama. */ OT_Repha = 14, /* Atomically-encoded logical or visual repha. */ OT_Ra = 15, OT_CM = 16, /* Consonant-Medial. */ OT_Symbol = 17, /* Avagraha, etc that take marks (SM,A,VD). */ OT_CS = 18, - /* The following are used by Khmer & Myanmar shapers. Defined - * here for them to share. */ - OT_VAbv = 26, - OT_VBlw = 27, - OT_VPre = 28, - OT_VPst = 29, + /* Khmer & Myanmar shapers. */ + OT_VAbv = 20, + OT_VBlw = 21, + OT_VPre = 22, + OT_VPst = 23, + + /* Khmer. */ + OT_Coeng = OT_H, + OT_Robatic = 25, + OT_Xgroup = 26, + OT_Ygroup = 27, + + /* Myanmar */ + OT_IV = OT_V, + OT_As = 32, // Asat + OT_D = 33, // Digits except zero + OT_D0 = 34, // Digit zero + OT_DB = OT_N, // Dot below + OT_GB = OT_PLACEHOLDER, + OT_MH = 35, // Medial Ha + OT_MR = 36, // Medial Ra + OT_MW = 37, // Medial Wa, Shan Wa + OT_MY = 38, // Medial Ya, Mon Na, Mon Ma + OT_PT = 39, // Pwo and other tones + OT_VS = 40, // Variation selectors + OT_P = 41, // Punctuation + OT_ML = 42, // Medial Mon La }; -#define MEDIAL_FLAGS (FLAG (OT_CM)) - -/* Note: - * - * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels - * cannot happen in a consonant syllable. The plus side however is, we can call the - * consonant syllable logic from the vowel syllable function and get it all right! */ -#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_CS) | FLAG (OT_Ra) | MEDIAL_FLAGS | FLAG (OT_V) | FLAG (OT_PLACEHOLDER) | FLAG (OT_DOTTEDCIRCLE)) -#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)) - - /* Visual positions in a syllable from left to right. */ -enum indic_position_t { +enum ot_position_t { POS_START = 0, POS_RA_TO_BECOME_REPH = 1, @@ -110,322 +125,9 @@ enum indic_position_t { POS_END = 15 }; -/* Categories used in IndicSyllabicCategory.txt from UCD. */ -enum indic_syllabic_category_t { - INDIC_SYLLABIC_CATEGORY_OTHER = OT_X, - - INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_Symbol, - INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM, - INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER = OT_PLACEHOLDER, /* Don't care. */ - INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK = OT_A, - INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C, - INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C, - INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL = OT_CM, - INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C, - INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER = OT_M, /* U+17CD only. */ - INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_CM, - INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_PLACEHOLDER, - INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA = OT_Repha, - INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED = OT_X, /* Don't care. */ - INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_CM, - INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA = OT_CM, - INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER = OT_CS, - INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK = OT_SM, /* https://github.com/harfbuzz/harfbuzz/issues/552 */ - INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER = OT_Coeng, - INDIC_SYLLABIC_CATEGORY_JOINER = OT_ZWJ, - INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X, - INDIC_SYLLABIC_CATEGORY_NON_JOINER = OT_ZWNJ, - INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N, - INDIC_SYLLABIC_CATEGORY_NUMBER = OT_PLACEHOLDER, - INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER = OT_PLACEHOLDER, /* Don't care. */ - INDIC_SYLLABIC_CATEGORY_PURE_KILLER = OT_M, /* Is like a vowel matra. */ - INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS, - INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER = OT_SM, - INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X, - INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_N, - INDIC_SYLLABIC_CATEGORY_VIRAMA = OT_H, - INDIC_SYLLABIC_CATEGORY_VISARGA = OT_SM, - INDIC_SYLLABIC_CATEGORY_VOWEL = OT_V, - INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT = OT_M, - INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT = OT_V -}; - -/* Categories used in IndicSMatraCategory.txt from UCD */ -enum indic_matra_category_t { - INDIC_MATRA_CATEGORY_NOT_APPLICABLE = POS_END, - - INDIC_MATRA_CATEGORY_LEFT = POS_PRE_C, - INDIC_MATRA_CATEGORY_TOP = POS_ABOVE_C, - INDIC_MATRA_CATEGORY_BOTTOM = POS_BELOW_C, - INDIC_MATRA_CATEGORY_RIGHT = POS_POST_C, - - /* These should resolve to the position of the last part of the split sequence. */ - INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, - INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, - INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM = INDIC_MATRA_CATEGORY_BOTTOM, - INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT = INDIC_MATRA_CATEGORY_BOTTOM, - INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, - INDIC_MATRA_CATEGORY_TOP_AND_LEFT = INDIC_MATRA_CATEGORY_TOP, - INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, - INDIC_MATRA_CATEGORY_TOP_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, - - INDIC_MATRA_CATEGORY_OVERSTRUCK = POS_AFTER_MAIN, - INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = POS_PRE_M -}; - -#define INDIC_COMBINE_CATEGORIES(S,M) \ - ( \ - static_assert_expr (S < 255 && M < 255) + \ - ( S | \ - ( \ - ( \ - S == INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL || \ - S == INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK || \ - S == INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER || \ - S == INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA || \ - S == INDIC_SYLLABIC_CATEGORY_VIRAMA || \ - S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT || \ - false \ - ? M : INDIC_MATRA_CATEGORY_NOT_APPLICABLE \ - ) << 8 \ - ) \ - ) \ - ) HB_INTERNAL uint16_t hb_indic_get_categories (hb_codepoint_t u); -static inline bool -is_one_of (const hb_glyph_info_t &info, unsigned int flags) -{ - /* If it ligated, all bets are off. */ - if (_hb_glyph_info_ligated (&info)) return false; - return !!(FLAG_UNSAFE (info.indic_category()) & flags); -} - -static inline bool -is_joiner (const hb_glyph_info_t &info) -{ - return is_one_of (info, JOINER_FLAGS); -} - -static inline bool -is_consonant (const hb_glyph_info_t &info) -{ - return is_one_of (info, CONSONANT_FLAGS); -} - -static inline bool -is_halant (const hb_glyph_info_t &info) -{ - return is_one_of (info, FLAG (OT_H)); -} - -#define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base)) - -#define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900u)) -#define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980u)) -#define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00u)) -#define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80u)) -#define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00u)) -#define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80u)) -#define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00u)) -#define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80u)) -#define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00u)) -#define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80u)) - - -#define MATRA_POS_LEFT(u) POS_PRE_M -#define MATRA_POS_RIGHT(u) ( \ - IS_DEVA(u) ? POS_AFTER_SUB : \ - IS_BENG(u) ? POS_AFTER_POST : \ - IS_GURU(u) ? POS_AFTER_POST : \ - IS_GUJR(u) ? POS_AFTER_POST : \ - IS_ORYA(u) ? POS_AFTER_POST : \ - IS_TAML(u) ? POS_AFTER_POST : \ - IS_TELU(u) ? (u <= 0x0C42u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ - IS_KNDA(u) ? (u < 0x0CC3u || u > 0xCD6u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ - IS_MLYM(u) ? POS_AFTER_POST : \ - IS_SINH(u) ? POS_AFTER_SUB : \ - /*default*/ POS_AFTER_SUB \ - ) -#define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ - IS_DEVA(u) ? POS_AFTER_SUB : \ - IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \ - IS_GUJR(u) ? POS_AFTER_SUB : \ - IS_ORYA(u) ? POS_AFTER_MAIN : \ - IS_TAML(u) ? POS_AFTER_SUB : \ - IS_TELU(u) ? POS_BEFORE_SUB : \ - IS_KNDA(u) ? POS_BEFORE_SUB : \ - IS_SINH(u) ? POS_AFTER_SUB : \ - /*default*/ POS_AFTER_SUB \ - ) -#define MATRA_POS_BOTTOM(u) ( \ - IS_DEVA(u) ? POS_AFTER_SUB : \ - IS_BENG(u) ? POS_AFTER_SUB : \ - IS_GURU(u) ? POS_AFTER_POST : \ - IS_GUJR(u) ? POS_AFTER_POST : \ - IS_ORYA(u) ? POS_AFTER_SUB : \ - IS_TAML(u) ? POS_AFTER_POST : \ - IS_TELU(u) ? POS_BEFORE_SUB : \ - IS_KNDA(u) ? POS_BEFORE_SUB : \ - IS_MLYM(u) ? POS_AFTER_POST : \ - IS_SINH(u) ? POS_AFTER_SUB : \ - /*default*/ POS_AFTER_SUB \ - ) - -static inline indic_position_t -matra_position_indic (hb_codepoint_t u, indic_position_t side) -{ - switch ((int) side) - { - case POS_PRE_C: return MATRA_POS_LEFT (u); - case POS_POST_C: return MATRA_POS_RIGHT (u); - case POS_ABOVE_C: return MATRA_POS_TOP (u); - case POS_BELOW_C: return MATRA_POS_BOTTOM (u); - } - return side; -} - -/* XXX - * This is a hack for now. We should move this data into the main Indic table. - * Or completely remove it and just check in the tables. - */ -static const hb_codepoint_t ra_chars[] = { - 0x0930u, /* Devanagari */ - 0x09B0u, /* Bengali */ - 0x09F0u, /* Bengali */ - 0x0A30u, /* Gurmukhi */ /* No Reph */ - 0x0AB0u, /* Gujarati */ - 0x0B30u, /* Oriya */ - 0x0BB0u, /* Tamil */ /* No Reph */ - 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */ - 0x0CB0u, /* Kannada */ - 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */ - - 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */ -}; - -static inline bool -is_ra (hb_codepoint_t u) -{ - return hb_array (ra_chars).lfind (u); -} - -static inline void -set_indic_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - indic_category_t cat = (indic_category_t) (type & 0xFFu); - indic_position_t pos = (indic_position_t) (type >> 8); - - - /* - * Re-assign category - */ - - /* The following act more like the Bindus. */ - if (unlikely (hb_in_range (u, 0x0953u, 0x0954u))) - cat = OT_SM; - /* The following act like consonants. */ - else if (unlikely (hb_in_ranges (u, 0x0A72u, 0x0A73u, - 0x1CF5u, 0x1CF6u))) - cat = OT_C; - /* TODO: The following should only be allowed after a Visarga. - * For now, just treat them like regular tone marks. */ - else if (unlikely (hb_in_range (u, 0x1CE2u, 0x1CE8u))) - cat = OT_A; - /* TODO: The following should only be allowed after some of - * the nasalization marks, maybe only for U+1CE9..U+1CF1. - * For now, just treat them like tone marks. */ - else if (unlikely (u == 0x1CEDu)) - cat = OT_A; - /* The following take marks in standalone clusters, similar to Avagraha. */ - else if (unlikely (hb_in_ranges (u, 0xA8F2u, 0xA8F7u, - 0x1CE9u, 0x1CECu, - 0x1CEEu, 0x1CF1u))) - { - cat = OT_Symbol; - static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), ""); - } - else if (unlikely (u == 0x0A51u)) - { - /* https://github.com/harfbuzz/harfbuzz/issues/524 */ - cat = OT_M; - pos = POS_BELOW_C; - } - - /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, - * so the Indic shaper needs to know their categories. */ - else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM; - else if (unlikely (u == 0x1133Bu || u == 0x1133Cu)) cat = OT_N; - - else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */ - else if (unlikely (u == 0x0B55u)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/2849 */ - - else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */ - else if (unlikely (u == 0x09FCu)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/1613 */ - else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */ - else if (unlikely (u == 0x0D04u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/3511 */ - else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u))) - cat = OT_PLACEHOLDER; - else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; - - - /* - * Re-assign position. - */ - - if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) - { - pos = POS_BASE_C; - if (is_ra (u)) - cat = OT_Ra; - } - else if (cat == OT_M) - { - pos = matra_position_indic (u, pos); - } - else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_VD) | FLAG (OT_A) | FLAG (OT_Symbol)))) - { - pos = POS_SMVD; - } - - if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ - - - - info.indic_category() = cat; - info.indic_position() = pos; -} - -struct hb_indic_would_substitute_feature_t -{ - void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) - { - zero_context = zero_context_; - map->get_stage_lookups (0/*GSUB*/, - map->get_feature_stage (0/*GSUB*/, feature_tag), - &lookups, &count); - } - - bool would_substitute (const hb_codepoint_t *glyphs, - unsigned int glyphs_count, - hb_face_t *face) const - { - for (unsigned int i = 0; i < count; i++) - if (hb_ot_layout_lookup_would_substitute (face, lookups[i].index, glyphs, glyphs_count, zero_context)) - return true; - return false; - } - - private: - const hb_ot_map_t::lookup_map_t *lookups; - unsigned int count; - bool zero_context; -}; - - #endif /* HB_OT_SHAPER_INDIC_HH */ diff --git a/src/hb-ot-shaper-khmer-machine.hh b/src/hb-ot-shaper-khmer-machine.hh index 8d2a64c31..cf136121c 100644 --- a/src/hb-ot-shaper-khmer-machine.hh +++ b/src/hb-ot-shaper-khmer-machine.hh @@ -31,6 +31,16 @@ #include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-shaper-indic.hh" + +/* buffer var allocations */ +#define khmer_category() indic_category() /* khmer_category_t */ + +using khmer_category_t = ot_category_t; + +#define K_Cat(Cat) khmer_syllable_machine_ex_##Cat + enum khmer_syllable_type_t { khmer_consonant_syllable, khmer_broken_cluster, @@ -38,174 +48,180 @@ enum khmer_syllable_type_t { }; -#line 42 "hb-ot-shaper-khmer-machine.hh" +#line 52 "hb-ot-shaper-khmer-machine.hh" #define khmer_syllable_machine_ex_C 1u -#define khmer_syllable_machine_ex_Coeng 13u +#define khmer_syllable_machine_ex_Coeng 4u #define khmer_syllable_machine_ex_DOTTEDCIRCLE 11u #define khmer_syllable_machine_ex_PLACEHOLDER 10u #define khmer_syllable_machine_ex_Ra 15u -#define khmer_syllable_machine_ex_Robatic 20u +#define khmer_syllable_machine_ex_Robatic 25u #define khmer_syllable_machine_ex_V 2u -#define khmer_syllable_machine_ex_VAbv 26u -#define khmer_syllable_machine_ex_VBlw 27u -#define khmer_syllable_machine_ex_VPre 28u -#define khmer_syllable_machine_ex_VPst 29u -#define khmer_syllable_machine_ex_Xgroup 21u -#define khmer_syllable_machine_ex_Ygroup 22u +#define khmer_syllable_machine_ex_VAbv 20u +#define khmer_syllable_machine_ex_VBlw 21u +#define khmer_syllable_machine_ex_VPre 22u +#define khmer_syllable_machine_ex_VPst 23u +#define khmer_syllable_machine_ex_Xgroup 26u +#define khmer_syllable_machine_ex_Ygroup 27u #define khmer_syllable_machine_ex_ZWJ 6u #define khmer_syllable_machine_ex_ZWNJ 5u -#line 60 "hb-ot-shaper-khmer-machine.hh" +#line 70 "hb-ot-shaper-khmer-machine.hh" static const unsigned char _khmer_syllable_machine_trans_keys[] = { - 5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 15u, 5u, 21u, 5u, 26u, 5u, 21u, - 5u, 26u, 5u, 21u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 15u, 5u, 21u, 5u, 26u, - 5u, 21u, 5u, 26u, 5u, 21u, 5u, 26u, 1u, 29u, 5u, 29u, 5u, 29u, 5u, 29u, - 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 5u, 29u, 1u, 15u, 5u, 26u, 5u, 29u, - 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 1u, 15u, 5u, 29u, 5u, 29u, + 5u, 26u, 5u, 26u, 1u, 15u, 5u, 26u, 5u, 26u, 5u, 26u, 5u, 26u, 5u, 26u, + 5u, 26u, 5u, 26u, 5u, 26u, 5u, 26u, 1u, 15u, 5u, 26u, 5u, 26u, 5u, 26u, + 5u, 26u, 5u, 26u, 5u, 26u, 5u, 26u, 1u, 27u, 4u, 27u, 1u, 15u, 4u, 27u, + 27u, 27u, 4u, 27u, 4u, 27u, 4u, 27u, 4u, 27u, 4u, 27u, 1u, 15u, 4u, 27u, + 4u, 27u, 27u, 27u, 4u, 27u, 4u, 27u, 4u, 27u, 4u, 27u, 4u, 27u, 5u, 26u, 0 }; static const char _khmer_syllable_machine_key_spans[] = { - 22, 17, 22, 17, 15, 17, 22, 17, - 22, 17, 17, 22, 17, 15, 17, 22, - 17, 22, 17, 22, 29, 25, 25, 25, - 1, 18, 25, 25, 25, 15, 22, 25, - 25, 1, 18, 25, 25, 15, 25, 25 + 22, 22, 15, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 15, 22, 22, 22, + 22, 22, 22, 22, 27, 24, 15, 24, + 1, 24, 24, 24, 24, 24, 15, 24, + 24, 1, 24, 24, 24, 24, 24, 22 }; static const short _khmer_syllable_machine_index_offsets[] = { - 0, 23, 41, 64, 82, 98, 116, 139, - 157, 180, 198, 216, 239, 257, 273, 291, - 314, 332, 355, 373, 396, 426, 452, 478, - 504, 506, 525, 551, 577, 603, 619, 642, - 668, 694, 696, 715, 741, 767, 783, 809 + 0, 23, 46, 62, 85, 108, 131, 154, + 177, 200, 223, 246, 269, 285, 308, 331, + 354, 377, 400, 423, 446, 474, 499, 515, + 540, 542, 567, 592, 617, 642, 667, 683, + 708, 733, 735, 760, 785, 810, 835, 860 }; static const char _khmer_syllable_machine_indicies[] = { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, - 3, 0, 0, 0, 0, 4, 0, 1, + 0, 0, 0, 0, 3, 4, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 3, - 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 3, 0, 0, 0, 0, 4, 0, - 5, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, - 4, 0, 6, 6, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 0, 7, 7, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 0, 0, + 0, 0, 0, 4, 0, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 8, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 10, 0, 0, 0, - 0, 4, 0, 9, 9, 0, 0, 0, + 0, 0, 2, 0, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 10, 0, 11, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 12, 0, 0, - 0, 0, 4, 0, 11, 11, 0, 0, + 0, 8, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 12, 0, 14, 14, - 13, 13, 13, 13, 13, 13, 13, 13, + 0, 0, 2, 0, 0, 0, 0, 0, + 10, 0, 9, 9, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 10, + 0, 11, 11, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 0, 12, 0, + 11, 11, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 12, 0, 14, + 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 13, - 14, 14, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, - 15, 16, 16, 16, 16, 17, 16, 18, - 18, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 17, - 16, 19, 19, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 19, - 16, 20, 20, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, - 16, 21, 16, 22, 22, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 23, 16, 16, 16, 16, - 17, 16, 22, 22, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 23, 16, 24, 24, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 25, 16, 16, 16, - 16, 17, 16, 24, 24, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 25, 16, 14, 14, 16, - 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 26, 15, 16, 16, - 16, 16, 17, 16, 28, 28, 27, 27, - 29, 29, 27, 27, 27, 2, 2, 27, - 30, 27, 28, 27, 27, 27, 27, 27, - 15, 19, 27, 27, 27, 17, 23, 25, - 21, 27, 32, 32, 31, 31, 31, 31, - 31, 31, 33, 31, 31, 31, 31, 31, - 31, 2, 3, 6, 31, 31, 31, 4, - 10, 12, 8, 31, 34, 34, 31, 31, - 31, 31, 31, 31, 35, 31, 31, 31, - 31, 31, 31, 31, 3, 6, 31, 31, - 31, 4, 10, 12, 8, 31, 5, 5, - 31, 31, 31, 31, 31, 31, 35, 31, - 31, 31, 31, 31, 31, 31, 4, 6, - 31, 31, 31, 31, 31, 31, 8, 31, - 6, 31, 7, 7, 31, 31, 31, 31, - 31, 31, 35, 31, 31, 31, 31, 31, - 31, 31, 8, 6, 31, 36, 36, 31, - 31, 31, 31, 31, 31, 35, 31, 31, - 31, 31, 31, 31, 31, 10, 6, 31, - 31, 31, 4, 31, 31, 8, 31, 37, - 37, 31, 31, 31, 31, 31, 31, 35, - 31, 31, 31, 31, 31, 31, 31, 12, - 6, 31, 31, 31, 4, 10, 31, 8, - 31, 34, 34, 31, 31, 31, 31, 31, - 31, 33, 31, 31, 31, 31, 31, 31, - 31, 3, 6, 31, 31, 31, 4, 10, - 12, 8, 31, 28, 28, 31, 31, 31, + 13, 13, 13, 16, 17, 13, 14, 14, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 17, 18, 19, 19, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 19, 13, 14, 14, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 15, 13, 13, 13, + 13, 13, 17, 13, 20, 20, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 15, 13, 21, 21, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 22, 13, 23, 23, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 15, 13, 13, 13, 13, 13, 24, + 13, 23, 23, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 24, 13, + 25, 25, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 15, + 13, 13, 13, 13, 13, 26, 13, 25, + 25, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 26, 13, 28, 28, + 27, 29, 30, 30, 27, 27, 27, 3, + 3, 27, 27, 27, 28, 27, 27, 27, + 27, 15, 24, 26, 22, 27, 27, 17, + 19, 27, 32, 33, 33, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, - 31, 28, 31, 14, 14, 38, 38, 38, + 31, 31, 2, 10, 12, 8, 31, 3, + 4, 5, 31, 28, 28, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 28, 31, 34, 35, 35, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 2, 10, 12, 8, 31, + 31, 4, 5, 31, 5, 31, 34, 6, + 6, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 8, 31, 31, 2, 5, 31, 34, + 7, 7, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 8, 5, 31, + 34, 36, 36, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 2, 31, 31, 8, 31, 31, 10, 5, + 31, 34, 37, 37, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 2, 10, 31, 8, 31, 31, 12, + 5, 31, 32, 35, 35, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 2, 10, 12, 8, 31, 31, + 4, 5, 31, 39, 39, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, - 38, 38, 38, 15, 38, 38, 38, 38, - 17, 38, 40, 40, 39, 39, 39, 39, - 39, 39, 41, 39, 39, 39, 39, 39, - 39, 39, 15, 19, 39, 39, 39, 17, - 23, 25, 21, 39, 18, 18, 39, 39, - 39, 39, 39, 39, 41, 39, 39, 39, - 39, 39, 39, 39, 17, 19, 39, 39, - 39, 39, 39, 39, 21, 39, 19, 39, - 20, 20, 39, 39, 39, 39, 39, 39, - 41, 39, 39, 39, 39, 39, 39, 39, - 21, 19, 39, 42, 42, 39, 39, 39, - 39, 39, 39, 41, 39, 39, 39, 39, - 39, 39, 39, 23, 19, 39, 39, 39, - 17, 39, 39, 21, 39, 43, 43, 39, - 39, 39, 39, 39, 39, 41, 39, 39, - 39, 39, 39, 39, 39, 25, 19, 39, - 39, 39, 17, 23, 39, 21, 39, 44, - 44, 39, 39, 39, 39, 39, 39, 39, - 39, 39, 39, 39, 39, 44, 39, 45, - 45, 39, 39, 39, 39, 39, 39, 30, - 39, 39, 39, 39, 39, 39, 26, 15, - 19, 39, 39, 39, 17, 23, 25, 21, - 39, 40, 40, 39, 39, 39, 39, 39, - 39, 30, 39, 39, 39, 39, 39, 39, - 39, 15, 19, 39, 39, 39, 17, 23, - 25, 21, 39, 0 + 38, 39, 38, 29, 40, 40, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 15, 24, 26, 22, 38, + 16, 17, 19, 38, 41, 42, 42, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 15, 24, 26, 22, + 38, 38, 17, 19, 38, 19, 38, 41, + 20, 20, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 22, 38, 38, 15, 19, 38, + 41, 21, 21, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 22, 19, + 38, 41, 43, 43, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 15, 38, 38, 22, 38, 38, 24, + 19, 38, 41, 44, 44, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 15, 24, 38, 22, 38, 38, + 26, 19, 38, 29, 42, 42, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 15, 24, 26, 22, 38, + 38, 17, 19, 38, 14, 14, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 15, 45, 45, 45, 45, + 45, 17, 45, 0 }; static const char _khmer_syllable_machine_trans_targs[] = { - 20, 1, 28, 22, 23, 3, 24, 5, - 25, 7, 26, 9, 27, 20, 10, 31, - 20, 32, 12, 33, 14, 34, 16, 35, - 18, 36, 39, 20, 21, 30, 37, 20, - 0, 29, 2, 4, 6, 8, 20, 20, - 11, 13, 15, 17, 38, 19 + 20, 1, 25, 29, 23, 24, 4, 5, + 26, 7, 27, 9, 28, 20, 11, 34, + 38, 32, 20, 33, 14, 15, 35, 17, + 36, 19, 37, 20, 21, 30, 39, 20, + 22, 0, 2, 3, 6, 8, 20, 31, + 10, 12, 13, 16, 18, 20 }; static const char _khmer_syllable_machine_trans_actions[] = { 1, 0, 2, 2, 2, 0, 0, 0, - 2, 0, 2, 0, 2, 3, 0, 4, - 5, 2, 0, 0, 0, 2, 0, 2, - 0, 2, 4, 8, 2, 9, 0, 10, - 0, 0, 0, 0, 0, 0, 11, 12, - 0, 0, 0, 0, 4, 0 + 2, 0, 2, 0, 2, 3, 0, 2, + 4, 4, 5, 0, 0, 0, 2, 0, + 2, 0, 2, 8, 2, 0, 9, 10, + 0, 0, 0, 0, 0, 0, 11, 4, + 0, 0, 0, 0, 0, 12 }; static const char _khmer_syllable_machine_to_state_actions[] = { @@ -226,10 +242,10 @@ static const char _khmer_syllable_machine_from_state_actions[] = { static const unsigned char _khmer_syllable_machine_eof_trans[] = { 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 14, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 0, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 39, 40, - 40, 40, 40, 40, 40, 40, 40, 40 + 1, 1, 14, 19, 14, 14, 14, 14, + 14, 14, 14, 14, 0, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 46 }; static const int khmer_syllable_machine_start = 20; @@ -239,11 +255,11 @@ static const int khmer_syllable_machine_error = -1; static const int khmer_syllable_machine_en_main = 20; -#line 43 "hb-ot-shaper-khmer-machine.rl" +#line 53 "hb-ot-shaper-khmer-machine.rl" -#line 86 "hb-ot-shaper-khmer-machine.rl" +#line 102 "hb-ot-shaper-khmer-machine.rl" #define found_syllable(syllable_type) \ @@ -262,7 +278,7 @@ find_syllables_khmer (hb_buffer_t *buffer) int cs; hb_glyph_info_t *info = buffer->info; -#line 266 "hb-ot-shaper-khmer-machine.hh" +#line 282 "hb-ot-shaper-khmer-machine.hh" { cs = khmer_syllable_machine_start; ts = 0; @@ -270,7 +286,7 @@ find_syllables_khmer (hb_buffer_t *buffer) act = 0; } -#line 106 "hb-ot-shaper-khmer-machine.rl" +#line 122 "hb-ot-shaper-khmer-machine.rl" p = 0; @@ -278,7 +294,7 @@ find_syllables_khmer (hb_buffer_t *buffer) unsigned int syllable_serial = 1; -#line 282 "hb-ot-shaper-khmer-machine.hh" +#line 298 "hb-ot-shaper-khmer-machine.hh" { int _slen; int _trans; @@ -292,7 +308,7 @@ _resume: #line 1 "NONE" {ts = p;} break; -#line 296 "hb-ot-shaper-khmer-machine.hh" +#line 312 "hb-ot-shaper-khmer-machine.hh" } _keys = _khmer_syllable_machine_trans_keys + (cs<<1); @@ -315,30 +331,30 @@ _eof_trans: {te = p+1;} break; case 8: -#line 82 "hb-ot-shaper-khmer-machine.rl" +#line 98 "hb-ot-shaper-khmer-machine.rl" {te = p+1;{ found_syllable (khmer_non_khmer_cluster); }} break; case 10: -#line 80 "hb-ot-shaper-khmer-machine.rl" +#line 96 "hb-ot-shaper-khmer-machine.rl" {te = p;p--;{ found_syllable (khmer_consonant_syllable); }} break; - case 12: -#line 81 "hb-ot-shaper-khmer-machine.rl" + case 11: +#line 97 "hb-ot-shaper-khmer-machine.rl" {te = p;p--;{ found_syllable (khmer_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; - case 11: -#line 82 "hb-ot-shaper-khmer-machine.rl" + case 12: +#line 98 "hb-ot-shaper-khmer-machine.rl" {te = p;p--;{ found_syllable (khmer_non_khmer_cluster); }} break; case 1: -#line 80 "hb-ot-shaper-khmer-machine.rl" +#line 96 "hb-ot-shaper-khmer-machine.rl" {{p = ((te))-1;}{ found_syllable (khmer_consonant_syllable); }} break; - case 5: -#line 81 "hb-ot-shaper-khmer-machine.rl" + case 3: +#line 97 "hb-ot-shaper-khmer-machine.rl" {{p = ((te))-1;}{ found_syllable (khmer_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; - case 3: + case 5: #line 1 "NONE" { switch( act ) { case 2: @@ -353,16 +369,16 @@ _eof_trans: case 4: #line 1 "NONE" {te = p+1;} -#line 81 "hb-ot-shaper-khmer-machine.rl" +#line 97 "hb-ot-shaper-khmer-machine.rl" {act = 2;} break; case 9: #line 1 "NONE" {te = p+1;} -#line 82 "hb-ot-shaper-khmer-machine.rl" +#line 98 "hb-ot-shaper-khmer-machine.rl" {act = 3;} break; -#line 366 "hb-ot-shaper-khmer-machine.hh" +#line 382 "hb-ot-shaper-khmer-machine.hh" } _again: @@ -371,7 +387,7 @@ _again: #line 1 "NONE" {ts = 0;} break; -#line 375 "hb-ot-shaper-khmer-machine.hh" +#line 391 "hb-ot-shaper-khmer-machine.hh" } if ( ++p != pe ) @@ -387,7 +403,7 @@ _again: } -#line 114 "hb-ot-shaper-khmer-machine.rl" +#line 130 "hb-ot-shaper-khmer-machine.rl" } diff --git a/src/hb-ot-shaper-khmer-machine.rl b/src/hb-ot-shaper-khmer-machine.rl index b343f89f6..1ebf586f0 100644 --- a/src/hb-ot-shaper-khmer-machine.rl +++ b/src/hb-ot-shaper-khmer-machine.rl @@ -29,6 +29,16 @@ #include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-shaper-indic.hh" + +/* buffer var allocations */ +#define khmer_category() indic_category() /* khmer_category_t */ + +using khmer_category_t = ot_category_t; + +#define K_Cat(Cat) khmer_syllable_machine_ex_##Cat + enum khmer_syllable_type_t { khmer_consonant_syllable, khmer_broken_cluster, @@ -44,21 +54,27 @@ enum khmer_syllable_type_t { %%{ + +# These values are replicated from indic.hh, and relisted in khmer.cc; keep in sync. + export C = 1; export V = 2; export ZWNJ = 5; export ZWJ = 6; export PLACEHOLDER = 10; export DOTTEDCIRCLE = 11; -export Coeng= 13; export Ra = 15; -export Robatic = 20; -export Xgroup = 21; -export Ygroup = 22; -export VAbv = 26; -export VBlw = 27; -export VPre = 28; -export VPst = 29; + +export VAbv = 20; +export VBlw = 21; +export VPre = 22; +export VPst = 23; + +export Coeng = 4; +export Robatic = 25; +export Xgroup = 26; +export Ygroup = 27; + c = (C | Ra | V); cn = c.((ZWJ|ZWNJ)?.Robatic)?; diff --git a/src/hb-ot-shaper-khmer.cc b/src/hb-ot-shaper-khmer.cc index a7c35ad10..2e31aa039 100644 --- a/src/hb-ot-shaper-khmer.cc +++ b/src/hb-ot-shaper-khmer.cc @@ -28,8 +28,8 @@ #ifndef HB_NO_OT_SHAPE -#include "hb-ot-shaper-khmer.hh" #include "hb-ot-shaper-khmer-machine.hh" +#include "hb-ot-shaper-indic.hh" #include "hb-ot-layout.hh" @@ -37,6 +37,30 @@ * Khmer shaper. */ + +#define K_Check(C) static_assert (OT_##C == K_Cat(C), "") + +K_Check (C); +K_Check (V); +K_Check (ZWNJ); +K_Check (ZWJ); +K_Check (PLACEHOLDER); +K_Check (DOTTEDCIRCLE); +K_Check (Ra); + +K_Check (VAbv); +K_Check (VBlw); +K_Check (VPre); +K_Check (VPst); + +K_Check (Coeng); +K_Check (Robatic); +K_Check (Xgroup); +K_Check (Ygroup); + +#undef K_Check + + static const hb_ot_map_feature_t khmer_features[] = { @@ -79,6 +103,15 @@ enum { KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */ }; +static inline void +set_khmer_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + + info.khmer_category() = (khmer_category_t) (type & 0xFFu); +} + static void setup_syllables_khmer (const hb_ot_shape_plan_t *plan, hb_font_t *font, @@ -231,11 +264,11 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan, * the 'pref' OpenType feature applied to them. * """ */ - if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end) + if (info[i].khmer_category() == K_Cat(Coeng) && num_coengs <= 2 && i + 1 < end) { num_coengs++; - if (info[i + 1].khmer_category() == OT_Ra) + if (info[i + 1].khmer_category() == K_Cat(Ra)) { for (unsigned int j = 0; j < 2; j++) info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF]; @@ -263,7 +296,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan, } /* Reorder left matra piece. */ - else if (info[i].khmer_category() == OT_VPre) + else if (info[i].khmer_category() == K_Cat(VPre)) { /* Move to the start. */ buffer->merge_clusters (start, i + 1); @@ -302,8 +335,8 @@ reorder_khmer (const hb_ot_shape_plan_t *plan, { hb_syllabic_insert_dotted_circles (font, buffer, khmer_broken_cluster, - OT_DOTTEDCIRCLE, - OT_Repha); + K_Cat(DOTTEDCIRCLE), + (unsigned) -1); foreach_syllable (buffer, start, end) reorder_syllable_khmer (plan, font->face, buffer, start, end); diff --git a/src/hb-ot-shaper-khmer.hh b/src/hb-ot-shaper-khmer.hh deleted file mode 100644 index e3d0d3215..000000000 --- a/src/hb-ot-shaper-khmer.hh +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright © 2018 Google, Inc. - * - * This is part of HarfBuzz, a text shaping library. - * - * Permission is hereby granted, without written agreement and without - * license or royalty fees, to use, copy, modify, and distribute this - * software and its documentation for any purpose, provided that the - * above copyright notice and the following two paragraphs appear in - * all copies of this software. - * - * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN - * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO - * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - * - * Google Author(s): Behdad Esfahbod - */ - -#ifndef HB_OT_SHAPER_KHMER_HH -#define HB_OT_SHAPER_KHMER_HH - -#include "hb.hh" - -#include "hb-ot-shaper-indic.hh" - - -/* buffer var allocations */ -#define khmer_category() indic_category() /* khmer_category_t */ - - -/* Note: This enum is duplicated in the -machine.rl source file. - * Not sure how to avoid duplication. */ -enum khmer_category_t -{ - OT_Robatic = 20, - OT_Xgroup = 21, - OT_Ygroup = 22, - //OT_VAbv = 26, - //OT_VBlw = 27, - //OT_VPre = 28, - //OT_VPst = 29, -}; - -using khmer_position_t = indic_position_t; - -static inline void -set_khmer_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - khmer_category_t cat = (khmer_category_t) (type & 0xFFu); - khmer_position_t pos = (khmer_position_t) (type >> 8); - - - /* - * Re-assign category - * - * These categories are experimentally extracted from what Uniscribe allows. - */ - switch (u) - { - case 0x179Au: - cat = (khmer_category_t) OT_Ra; - break; - - case 0x17CCu: - case 0x17C9u: - case 0x17CAu: - cat = OT_Robatic; - break; - - case 0x17C6u: - case 0x17CBu: - case 0x17CDu: - case 0x17CEu: - case 0x17CFu: - case 0x17D0u: - case 0x17D1u: - cat = OT_Xgroup; - break; - - case 0x17C7u: - case 0x17C8u: - case 0x17DDu: - case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */ - cat = OT_Ygroup; - break; - } - - /* - * Re-assign position. - */ - if (cat == (khmer_category_t) OT_M) - switch ((int) pos) - { - case POS_PRE_C: cat = (khmer_category_t) OT_VPre; break; - case POS_BELOW_C: cat = (khmer_category_t) OT_VBlw; break; - case POS_ABOVE_C: cat = (khmer_category_t) OT_VAbv; break; - case POS_POST_C: cat = (khmer_category_t) OT_VPst; break; - default: assert (0); - } - - info.khmer_category() = cat; -} - - -#endif /* HB_OT_SHAPER_KHMER_HH */ diff --git a/src/hb-ot-shaper-myanmar-machine.hh b/src/hb-ot-shaper-myanmar-machine.hh index a211a94d7..628e2e7fa 100644 --- a/src/hb-ot-shaper-myanmar-machine.hh +++ b/src/hb-ot-shaper-myanmar-machine.hh @@ -31,6 +31,18 @@ #include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-shaper-indic.hh" + +/* buffer var allocations */ +#define myanmar_category() indic_category() /* myanmar_category_t */ +#define myanmar_position() indic_position() /* myanmar_position_t */ + +using myanmar_category_t = ot_category_t; +using myanmar_position_t = ot_position_t; + +#define M_Cat(Cat) myanmar_syllable_machine_ex_##Cat + enum myanmar_syllable_type_t { myanmar_consonant_syllable, myanmar_punctuation_cluster, @@ -39,267 +51,336 @@ enum myanmar_syllable_type_t { }; -#line 43 "hb-ot-shaper-myanmar-machine.hh" +#line 55 "hb-ot-shaper-myanmar-machine.hh" #define myanmar_syllable_machine_ex_A 9u -#define myanmar_syllable_machine_ex_As 18u +#define myanmar_syllable_machine_ex_As 32u #define myanmar_syllable_machine_ex_C 1u -#define myanmar_syllable_machine_ex_CS 19u -#define myanmar_syllable_machine_ex_D 10u -#define myanmar_syllable_machine_ex_D0 20u +#define myanmar_syllable_machine_ex_CS 18u +#define myanmar_syllable_machine_ex_D 33u +#define myanmar_syllable_machine_ex_D0 34u #define myanmar_syllable_machine_ex_DB 3u +#define myanmar_syllable_machine_ex_DOTTEDCIRCLE 11u #define myanmar_syllable_machine_ex_GB 10u #define myanmar_syllable_machine_ex_H 4u #define myanmar_syllable_machine_ex_IV 2u -#define myanmar_syllable_machine_ex_MH 21u -#define myanmar_syllable_machine_ex_ML 32u -#define myanmar_syllable_machine_ex_MR 22u -#define myanmar_syllable_machine_ex_MW 23u -#define myanmar_syllable_machine_ex_MY 24u -#define myanmar_syllable_machine_ex_P 31u -#define myanmar_syllable_machine_ex_PT 25u +#define myanmar_syllable_machine_ex_MH 35u +#define myanmar_syllable_machine_ex_ML 42u +#define myanmar_syllable_machine_ex_MR 36u +#define myanmar_syllable_machine_ex_MW 37u +#define myanmar_syllable_machine_ex_MY 38u +#define myanmar_syllable_machine_ex_P 41u +#define myanmar_syllable_machine_ex_PT 39u #define myanmar_syllable_machine_ex_Ra 15u -#define myanmar_syllable_machine_ex_V 8u -#define myanmar_syllable_machine_ex_VAbv 26u -#define myanmar_syllable_machine_ex_VBlw 27u -#define myanmar_syllable_machine_ex_VPre 28u -#define myanmar_syllable_machine_ex_VPst 29u -#define myanmar_syllable_machine_ex_VS 30u +#define myanmar_syllable_machine_ex_SM 8u +#define myanmar_syllable_machine_ex_VAbv 20u +#define myanmar_syllable_machine_ex_VBlw 21u +#define myanmar_syllable_machine_ex_VPre 22u +#define myanmar_syllable_machine_ex_VPst 23u +#define myanmar_syllable_machine_ex_VS 40u #define myanmar_syllable_machine_ex_ZWJ 6u #define myanmar_syllable_machine_ex_ZWNJ 5u -#line 72 "hb-ot-shaper-myanmar-machine.hh" +#line 85 "hb-ot-shaper-myanmar-machine.hh" static const unsigned char _myanmar_syllable_machine_trans_keys[] = { - 1u, 32u, 3u, 32u, 5u, 29u, 5u, 8u, 5u, 29u, 3u, 25u, 5u, 25u, 5u, 25u, - 3u, 32u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 32u, 1u, 15u, 3u, 32u, 3u, 32u, - 3u, 29u, 3u, 29u, 3u, 29u, 3u, 30u, 3u, 29u, 3u, 32u, 3u, 32u, 3u, 32u, - 3u, 32u, 3u, 32u, 5u, 29u, 5u, 8u, 5u, 29u, 3u, 25u, 5u, 25u, 5u, 25u, - 3u, 32u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 32u, 1u, 15u, 3u, 32u, 3u, 32u, - 3u, 32u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 30u, 3u, 29u, 3u, 32u, 3u, 32u, - 3u, 32u, 3u, 32u, 3u, 32u, 3u, 32u, 3u, 32u, 1u, 32u, 1u, 15u, 8u, 8u, + 1u, 42u, 3u, 42u, 5u, 39u, 5u, 8u, 3u, 42u, 3u, 39u, 3u, 39u, 5u, 39u, + 5u, 39u, 3u, 39u, 3u, 39u, 3u, 42u, 5u, 39u, 1u, 15u, 3u, 39u, 3u, 39u, + 3u, 40u, 3u, 39u, 3u, 42u, 3u, 42u, 3u, 39u, 3u, 42u, 3u, 42u, 3u, 42u, + 3u, 42u, 3u, 42u, 5u, 39u, 5u, 8u, 3u, 42u, 3u, 39u, 3u, 39u, 5u, 39u, + 5u, 39u, 3u, 39u, 3u, 39u, 3u, 42u, 5u, 39u, 1u, 15u, 3u, 42u, 3u, 39u, + 3u, 39u, 3u, 40u, 3u, 39u, 3u, 42u, 3u, 42u, 3u, 39u, 3u, 42u, 3u, 42u, + 3u, 42u, 3u, 42u, 3u, 42u, 3u, 42u, 3u, 42u, 1u, 42u, 1u, 33u, 8u, 8u, 0 }; static const char _myanmar_syllable_machine_key_spans[] = { - 32, 30, 25, 4, 25, 23, 21, 21, - 30, 27, 27, 27, 30, 15, 30, 30, - 27, 27, 27, 28, 27, 30, 30, 30, - 30, 30, 25, 4, 25, 23, 21, 21, - 30, 27, 27, 27, 30, 15, 30, 30, - 30, 27, 27, 27, 28, 27, 30, 30, - 30, 30, 30, 30, 30, 32, 15, 1 + 42, 40, 35, 4, 40, 37, 37, 35, + 35, 37, 37, 40, 35, 15, 37, 37, + 38, 37, 40, 40, 37, 40, 40, 40, + 40, 40, 35, 4, 40, 37, 37, 35, + 35, 37, 37, 40, 35, 15, 40, 37, + 37, 38, 37, 40, 40, 37, 40, 40, + 40, 40, 40, 40, 40, 42, 33, 1 }; static const short _myanmar_syllable_machine_index_offsets[] = { - 0, 33, 64, 90, 95, 121, 145, 167, - 189, 220, 248, 276, 304, 335, 351, 382, - 413, 441, 469, 497, 526, 554, 585, 616, - 647, 678, 709, 735, 740, 766, 790, 812, - 834, 865, 893, 921, 949, 980, 996, 1027, - 1058, 1089, 1117, 1145, 1173, 1202, 1230, 1261, - 1292, 1323, 1354, 1385, 1416, 1447, 1480, 1496 + 0, 43, 84, 120, 125, 166, 204, 242, + 278, 314, 352, 390, 431, 467, 483, 521, + 559, 598, 636, 677, 718, 756, 797, 838, + 879, 920, 961, 997, 1002, 1043, 1081, 1119, + 1155, 1191, 1229, 1267, 1308, 1344, 1360, 1401, + 1439, 1477, 1516, 1554, 1595, 1636, 1674, 1715, + 1756, 1797, 1838, 1879, 1920, 1961, 2004, 2038 }; static const char _myanmar_syllable_machine_indicies[] = { 1, 1, 2, 3, 4, 4, 0, 5, - 6, 1, 0, 0, 0, 0, 7, 0, - 0, 8, 9, 0, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, - 0, 23, 24, 25, 25, 22, 26, 27, + 6, 1, 1, 0, 0, 0, 7, 0, + 0, 8, 0, 9, 10, 11, 12, 0, + 0, 0, 0, 0, 0, 0, 0, 13, + 1, 0, 14, 15, 16, 17, 18, 19, + 20, 21, 0, 23, 24, 25, 25, 22, + 26, 27, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 28, 29, 30, 31, 22, 22, 22, 22, 22, 22, 22, 22, - 28, 22, 22, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 22, 39, 22, - 25, 25, 22, 26, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 40, 22, 22, - 22, 22, 22, 22, 33, 22, 22, 22, - 37, 22, 25, 25, 22, 26, 22, 25, - 25, 22, 26, 22, 22, 22, 22, 22, + 32, 22, 22, 33, 34, 35, 36, 37, + 38, 22, 39, 22, 25, 25, 22, 26, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 33, 22, 22, 22, 37, - 22, 41, 22, 25, 25, 22, 26, 33, - 22, 22, 22, 22, 22, 22, 22, 22, - 42, 22, 22, 22, 22, 22, 22, 33, - 22, 25, 25, 22, 26, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 42, 22, - 22, 22, 22, 22, 22, 33, 22, 25, - 25, 22, 26, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 33, 22, 23, 22, 25, + 22, 22, 22, 22, 22, 22, 31, 22, + 22, 22, 22, 22, 22, 22, 22, 40, + 22, 22, 22, 22, 22, 22, 37, 22, + 25, 25, 22, 26, 22, 23, 22, 25, 25, 22, 26, 27, 22, 22, 22, 22, - 22, 22, 22, 22, 43, 22, 22, 44, - 22, 22, 22, 33, 45, 22, 22, 37, - 22, 22, 43, 22, 23, 22, 25, 25, - 22, 26, 27, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 33, 22, 22, 22, 37, 22, - 23, 22, 25, 25, 22, 26, 27, 22, - 22, 22, 22, 22, 22, 22, 22, 43, - 22, 22, 22, 22, 22, 22, 33, 45, - 22, 22, 37, 22, 23, 22, 25, 25, - 22, 26, 27, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 33, 45, 22, 22, 37, 22, - 23, 22, 25, 25, 22, 26, 27, 22, - 22, 22, 22, 22, 22, 22, 22, 43, - 22, 22, 22, 22, 22, 22, 33, 45, - 22, 22, 37, 22, 22, 43, 22, 1, - 1, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 1, 22, 23, - 22, 25, 25, 22, 26, 27, 22, 22, - 22, 22, 22, 22, 22, 22, 28, 22, - 22, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 22, 22, 39, 22, 23, 22, + 22, 22, 22, 22, 22, 22, 41, 22, + 22, 31, 22, 22, 22, 22, 22, 22, + 22, 22, 42, 22, 22, 43, 22, 22, + 22, 37, 22, 22, 42, 22, 23, 22, 25, 25, 22, 26, 27, 22, 22, 22, - 22, 22, 22, 22, 22, 46, 22, 22, - 22, 22, 22, 22, 33, 34, 35, 36, - 37, 22, 22, 39, 22, 23, 22, 25, - 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 33, 34, 35, 36, 37, - 22, 23, 22, 25, 25, 22, 26, 27, + 22, 22, 31, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 37, 22, 44, 22, 25, 25, + 22, 26, 37, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 45, 22, 22, 22, 22, 22, 22, + 37, 22, 25, 25, 22, 26, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 45, 22, 22, + 22, 22, 22, 22, 37, 22, 25, 25, + 22, 26, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 33, - 34, 35, 22, 37, 22, 23, 22, 25, - 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 33, 22, 35, 22, 37, - 22, 23, 22, 25, 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 33, - 34, 35, 36, 37, 46, 22, 23, 22, - 25, 25, 22, 26, 27, 22, 22, 22, - 22, 22, 22, 22, 22, 46, 22, 22, - 22, 22, 22, 22, 33, 34, 35, 36, 37, 22, 23, 22, 25, 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 29, 22, 31, 22, - 33, 34, 35, 36, 37, 22, 22, 39, + 22, 22, 22, 41, 22, 22, 31, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 37, 22, + 23, 22, 25, 25, 22, 26, 27, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 41, 22, 22, 31, 22, 22, 22, + 22, 22, 22, 22, 22, 42, 22, 22, + 22, 22, 22, 22, 37, 22, 23, 22, + 25, 25, 22, 26, 27, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 41, + 22, 22, 31, 22, 22, 22, 22, 22, + 22, 22, 22, 42, 22, 22, 22, 22, + 22, 22, 37, 22, 22, 42, 22, 25, + 25, 22, 26, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 31, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 37, 22, 1, 1, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 1, 22, 23, 22, 25, 25, 22, + 26, 27, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 28, 29, 22, 31, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 37, 22, 23, 22, 25, 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, - 46, 22, 22, 29, 22, 22, 22, 33, - 34, 35, 36, 37, 22, 22, 39, 22, - 23, 22, 25, 25, 22, 26, 27, 22, - 22, 22, 22, 22, 22, 22, 22, 47, - 22, 22, 29, 30, 31, 22, 33, 34, - 35, 36, 37, 22, 22, 39, 22, 23, + 22, 22, 22, 29, 22, 31, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 37, 22, 23, 22, 25, 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 29, 30, 31, 22, 33, 34, 35, - 36, 37, 22, 22, 39, 22, 23, 24, + 28, 29, 30, 31, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 37, 46, 22, 23, 22, 25, 25, 22, 26, 27, 22, 22, 22, - 22, 22, 22, 22, 22, 28, 22, 22, - 29, 30, 31, 32, 33, 34, 35, 36, - 37, 22, 22, 39, 22, 49, 49, 48, - 5, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 50, 48, 48, 48, 48, 48, - 48, 14, 48, 48, 48, 18, 48, 49, - 49, 48, 5, 48, 49, 49, 48, 5, + 22, 22, 22, 22, 22, 22, 22, 28, + 29, 30, 31, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 37, 22, 23, 22, 25, 25, + 22, 26, 27, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 28, 29, 30, + 31, 22, 22, 22, 22, 22, 22, 22, + 22, 32, 22, 22, 33, 34, 35, 36, + 37, 22, 22, 39, 22, 23, 22, 25, + 25, 22, 26, 27, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 28, 29, + 30, 31, 22, 22, 22, 22, 22, 22, + 22, 22, 46, 22, 22, 22, 22, 22, + 22, 37, 22, 22, 39, 22, 23, 22, + 25, 25, 22, 26, 27, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 28, + 29, 30, 31, 22, 22, 22, 22, 22, + 22, 22, 22, 46, 22, 22, 22, 22, + 22, 22, 37, 22, 23, 22, 25, 25, + 22, 26, 27, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 28, 29, 30, + 31, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 33, 22, 35, 22, + 37, 22, 22, 39, 22, 23, 22, 25, + 25, 22, 26, 27, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 28, 29, + 30, 31, 22, 22, 22, 22, 22, 22, + 22, 22, 46, 22, 22, 33, 22, 22, + 22, 37, 22, 22, 39, 22, 23, 22, + 25, 25, 22, 26, 27, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 28, + 29, 30, 31, 22, 22, 22, 22, 22, + 22, 22, 22, 47, 22, 22, 33, 34, + 35, 22, 37, 22, 22, 39, 22, 23, + 22, 25, 25, 22, 26, 27, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 28, 29, 30, 31, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 33, + 34, 35, 22, 37, 22, 22, 39, 22, + 23, 24, 25, 25, 22, 26, 27, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 28, 29, 30, 31, 22, 22, 22, + 22, 22, 22, 22, 22, 32, 22, 22, + 33, 34, 35, 36, 37, 22, 22, 39, + 22, 49, 49, 48, 5, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 14, 48, 48, 48, 18, 48, 51, 48, - 49, 49, 48, 5, 14, 48, 48, 48, - 48, 48, 48, 48, 48, 52, 48, 48, - 48, 48, 48, 48, 14, 48, 49, 49, - 48, 5, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 52, 48, 48, 48, 48, - 48, 48, 14, 48, 49, 49, 48, 5, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 14, 48, 2, 48, 49, 49, 48, 5, + 48, 48, 48, 12, 48, 48, 48, 48, + 48, 48, 48, 48, 50, 48, 48, 48, + 48, 48, 48, 18, 48, 49, 49, 48, + 5, 48, 2, 48, 49, 49, 48, 5, 6, 48, 48, 48, 48, 48, 48, 48, - 48, 53, 48, 48, 54, 48, 48, 48, - 14, 55, 48, 48, 18, 48, 48, 53, - 48, 2, 48, 49, 49, 48, 5, 6, + 48, 48, 48, 51, 48, 48, 12, 48, + 48, 48, 48, 48, 48, 48, 48, 52, + 48, 48, 53, 48, 48, 48, 18, 48, + 48, 52, 48, 2, 48, 49, 49, 48, + 5, 6, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 12, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 18, + 48, 54, 48, 49, 49, 48, 5, 18, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 55, 48, + 48, 48, 48, 48, 48, 18, 48, 49, + 49, 48, 5, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 55, 48, 48, 48, 48, 48, + 48, 18, 48, 49, 49, 48, 5, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 18, 48, 2, + 48, 49, 49, 48, 5, 6, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 51, 48, 48, 12, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 14, 48, 48, 48, 18, 48, 2, 48, 49, 49, 48, 5, 6, 48, 48, 48, 48, - 48, 48, 48, 48, 53, 48, 48, 48, - 48, 48, 48, 14, 55, 48, 48, 18, + 48, 48, 48, 48, 48, 48, 51, 48, + 48, 12, 48, 48, 48, 48, 48, 48, + 48, 48, 52, 48, 48, 48, 48, 48, + 48, 18, 48, 2, 48, 49, 49, 48, + 5, 6, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 51, 48, 48, 12, + 48, 48, 48, 48, 48, 48, 48, 48, + 52, 48, 48, 48, 48, 48, 48, 18, + 48, 48, 52, 48, 49, 49, 48, 5, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 12, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 18, 48, + 56, 56, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 56, 48, + 2, 3, 49, 49, 48, 5, 6, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 9, 10, 11, 12, 48, 48, 48, + 48, 48, 48, 48, 48, 13, 48, 48, + 14, 15, 16, 17, 18, 19, 48, 21, 48, 2, 48, 49, 49, 48, 5, 6, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 14, - 55, 48, 48, 18, 48, 2, 48, 49, - 49, 48, 5, 6, 48, 48, 48, 48, - 48, 48, 48, 48, 53, 48, 48, 48, - 48, 48, 48, 14, 55, 48, 48, 18, - 48, 48, 53, 48, 56, 56, 48, 48, + 48, 48, 9, 10, 48, 12, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 56, 48, 2, 3, 49, 49, + 48, 48, 48, 48, 48, 18, 48, 2, + 48, 49, 49, 48, 5, 6, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 10, 48, 12, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 18, 48, 2, 48, 49, + 49, 48, 5, 6, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 9, 10, + 11, 12, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 18, 57, 48, 2, 48, 49, 49, 48, 5, 6, 48, 48, 48, 48, 48, - 48, 48, 48, 8, 48, 48, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, + 48, 48, 48, 48, 48, 9, 10, 11, + 12, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 18, 48, 2, 48, 49, 49, 48, 5, + 6, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 9, 10, 11, 12, 48, + 48, 48, 48, 48, 48, 48, 48, 13, + 48, 48, 14, 15, 16, 17, 18, 48, 48, 21, 48, 2, 48, 49, 49, 48, 5, 6, 48, 48, 48, 48, 48, 48, - 48, 48, 8, 48, 48, 10, 11, 12, - 13, 14, 15, 16, 17, 18, 48, 48, - 21, 48, 2, 48, 49, 49, 48, 5, - 6, 48, 48, 48, 48, 48, 48, 48, - 48, 57, 48, 48, 48, 48, 48, 48, - 14, 15, 16, 17, 18, 48, 48, 21, - 48, 2, 48, 49, 49, 48, 5, 6, + 48, 48, 48, 48, 9, 10, 11, 12, 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 14, - 15, 16, 17, 18, 48, 2, 48, 49, - 49, 48, 5, 6, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 14, 15, 16, 48, 18, - 48, 2, 48, 49, 49, 48, 5, 6, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 14, - 48, 16, 48, 18, 48, 2, 48, 49, - 49, 48, 5, 6, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 14, 15, 16, 17, 18, - 57, 48, 2, 48, 49, 49, 48, 5, - 6, 48, 48, 48, 48, 48, 48, 48, - 48, 57, 48, 48, 48, 48, 48, 48, - 14, 15, 16, 17, 18, 48, 2, 48, - 49, 49, 48, 5, 6, 48, 48, 48, - 48, 48, 48, 48, 48, 48, 48, 48, - 10, 48, 12, 48, 14, 15, 16, 17, - 18, 48, 48, 21, 48, 2, 48, 49, - 49, 48, 5, 6, 48, 48, 48, 48, - 48, 48, 48, 48, 57, 48, 48, 10, - 48, 48, 48, 14, 15, 16, 17, 18, + 57, 48, 48, 48, 48, 48, 48, 18, 48, 48, 21, 48, 2, 48, 49, 49, 48, 5, 6, 48, 48, 48, 48, 48, - 48, 48, 48, 58, 48, 48, 10, 11, - 12, 48, 14, 15, 16, 17, 18, 48, + 48, 48, 48, 48, 48, 9, 10, 11, + 12, 48, 48, 48, 48, 48, 48, 48, + 48, 57, 48, 48, 48, 48, 48, 48, + 18, 48, 2, 48, 49, 49, 48, 5, + 6, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 9, 10, 11, 12, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 14, 48, 16, 48, 18, 48, 48, 21, 48, 2, 48, 49, 49, 48, 5, 6, 48, 48, 48, 48, 48, 48, - 48, 48, 48, 48, 48, 10, 11, 12, - 48, 14, 15, 16, 17, 18, 48, 48, - 21, 48, 2, 3, 49, 49, 48, 5, - 6, 48, 48, 48, 48, 48, 48, 48, - 48, 8, 48, 48, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 48, 48, 21, - 48, 23, 24, 25, 25, 22, 26, 27, + 48, 48, 48, 48, 9, 10, 11, 12, + 48, 48, 48, 48, 48, 48, 48, 48, + 57, 48, 48, 14, 48, 48, 48, 18, + 48, 48, 21, 48, 2, 48, 49, 49, + 48, 5, 6, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 9, 10, 11, + 12, 48, 48, 48, 48, 48, 48, 48, + 48, 58, 48, 48, 14, 15, 16, 48, + 18, 48, 48, 21, 48, 2, 48, 49, + 49, 48, 5, 6, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 9, 10, + 11, 12, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 14, 15, 16, + 48, 18, 48, 48, 21, 48, 2, 3, + 49, 49, 48, 5, 6, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 9, + 10, 11, 12, 48, 48, 48, 48, 48, + 48, 48, 48, 13, 48, 48, 14, 15, + 16, 17, 18, 48, 48, 21, 48, 23, + 24, 25, 25, 22, 26, 27, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 59, 22, 22, 29, 30, 31, 32, 33, + 28, 29, 30, 31, 22, 22, 22, 22, + 22, 22, 22, 22, 59, 22, 22, 33, 34, 35, 36, 37, 38, 22, 39, 22, 23, 60, 25, 25, 22, 26, 27, 22, - 22, 22, 22, 22, 22, 22, 22, 28, - 22, 22, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 22, 22, 39, 22, 1, - 1, 2, 3, 49, 49, 48, 5, 6, - 1, 48, 48, 48, 48, 1, 48, 48, - 8, 48, 48, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 48, 21, 48, - 1, 1, 61, 61, 61, 61, 61, 61, - 61, 1, 61, 61, 61, 61, 1, 61, - 62, 61, 0 + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 28, 29, 30, 31, 22, 22, 22, + 22, 22, 22, 22, 22, 32, 22, 22, + 33, 34, 35, 36, 37, 22, 22, 39, + 22, 1, 1, 2, 3, 49, 49, 48, + 5, 6, 1, 1, 48, 48, 48, 1, + 48, 48, 48, 48, 9, 10, 11, 12, + 48, 48, 48, 48, 48, 48, 48, 48, + 13, 1, 48, 14, 15, 16, 17, 18, + 19, 48, 21, 48, 1, 1, 61, 61, + 61, 61, 61, 61, 61, 1, 1, 61, + 61, 61, 1, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 1, 61, 62, 61, + 0 }; static const char _myanmar_syllable_machine_trans_targs[] = { - 0, 1, 26, 37, 0, 27, 33, 51, - 39, 54, 40, 46, 47, 48, 29, 42, - 43, 44, 32, 50, 55, 45, 0, 2, - 13, 0, 3, 9, 14, 15, 21, 22, - 23, 5, 17, 18, 19, 8, 25, 20, - 4, 6, 7, 10, 12, 11, 16, 24, - 0, 0, 28, 30, 31, 34, 36, 35, - 38, 41, 49, 52, 53, 0, 0 + 0, 1, 26, 37, 0, 27, 29, 51, + 54, 39, 40, 41, 28, 43, 44, 46, + 47, 48, 30, 50, 55, 45, 0, 2, + 13, 0, 3, 5, 14, 15, 16, 4, + 18, 19, 21, 22, 23, 6, 25, 20, + 12, 9, 10, 11, 7, 8, 17, 24, + 0, 0, 36, 33, 34, 35, 31, 32, + 38, 42, 49, 52, 53, 0, 0 }; static const char _myanmar_syllable_machine_trans_actions[] = { @@ -350,11 +431,11 @@ static const int myanmar_syllable_machine_error = -1; static const int myanmar_syllable_machine_en_main = 0; -#line 44 "hb-ot-shaper-myanmar-machine.rl" +#line 56 "hb-ot-shaper-myanmar-machine.rl" -#line 102 "hb-ot-shaper-myanmar-machine.rl" +#line 121 "hb-ot-shaper-myanmar-machine.rl" #define found_syllable(syllable_type) \ @@ -373,7 +454,7 @@ find_syllables_myanmar (hb_buffer_t *buffer) int cs; hb_glyph_info_t *info = buffer->info; -#line 377 "hb-ot-shaper-myanmar-machine.hh" +#line 458 "hb-ot-shaper-myanmar-machine.hh" { cs = myanmar_syllable_machine_start; ts = 0; @@ -381,7 +462,7 @@ find_syllables_myanmar (hb_buffer_t *buffer) act = 0; } -#line 122 "hb-ot-shaper-myanmar-machine.rl" +#line 141 "hb-ot-shaper-myanmar-machine.rl" p = 0; @@ -389,7 +470,7 @@ find_syllables_myanmar (hb_buffer_t *buffer) unsigned int syllable_serial = 1; -#line 393 "hb-ot-shaper-myanmar-machine.hh" +#line 474 "hb-ot-shaper-myanmar-machine.hh" { int _slen; int _trans; @@ -403,7 +484,7 @@ _resume: #line 1 "NONE" {ts = p;} break; -#line 407 "hb-ot-shaper-myanmar-machine.hh" +#line 488 "hb-ot-shaper-myanmar-machine.hh" } _keys = _myanmar_syllable_machine_trans_keys + (cs<<1); @@ -422,38 +503,38 @@ _eof_trans: switch ( _myanmar_syllable_machine_trans_actions[_trans] ) { case 6: -#line 94 "hb-ot-shaper-myanmar-machine.rl" +#line 113 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_consonant_syllable); }} break; case 4: -#line 95 "hb-ot-shaper-myanmar-machine.rl" +#line 114 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }} break; case 10: -#line 96 "hb-ot-shaper-myanmar-machine.rl" +#line 115 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_punctuation_cluster); }} break; case 8: -#line 97 "hb-ot-shaper-myanmar-machine.rl" +#line 116 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 3: -#line 98 "hb-ot-shaper-myanmar-machine.rl" +#line 117 "hb-ot-shaper-myanmar-machine.rl" {te = p+1;{ found_syllable (myanmar_non_myanmar_cluster); }} break; case 5: -#line 94 "hb-ot-shaper-myanmar-machine.rl" +#line 113 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_consonant_syllable); }} break; case 7: -#line 97 "hb-ot-shaper-myanmar-machine.rl" +#line 116 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }} break; case 9: -#line 98 "hb-ot-shaper-myanmar-machine.rl" +#line 117 "hb-ot-shaper-myanmar-machine.rl" {te = p;p--;{ found_syllable (myanmar_non_myanmar_cluster); }} break; -#line 457 "hb-ot-shaper-myanmar-machine.hh" +#line 538 "hb-ot-shaper-myanmar-machine.hh" } _again: @@ -462,7 +543,7 @@ _again: #line 1 "NONE" {ts = 0;} break; -#line 466 "hb-ot-shaper-myanmar-machine.hh" +#line 547 "hb-ot-shaper-myanmar-machine.hh" } if ( ++p != pe ) @@ -478,7 +559,7 @@ _again: } -#line 130 "hb-ot-shaper-myanmar-machine.rl" +#line 149 "hb-ot-shaper-myanmar-machine.rl" } diff --git a/src/hb-ot-shaper-myanmar-machine.rl b/src/hb-ot-shaper-myanmar-machine.rl index aec05f405..234801579 100644 --- a/src/hb-ot-shaper-myanmar-machine.rl +++ b/src/hb-ot-shaper-myanmar-machine.rl @@ -29,6 +29,18 @@ #include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-shaper-indic.hh" + +/* buffer var allocations */ +#define myanmar_category() indic_category() /* myanmar_category_t */ +#define myanmar_position() indic_position() /* myanmar_position_t */ + +using myanmar_category_t = ot_category_t; +using myanmar_position_t = ot_position_t; + +#define M_Cat(Cat) myanmar_syllable_machine_ex_##Cat + enum myanmar_syllable_type_t { myanmar_consonant_syllable, myanmar_punctuation_cluster, @@ -45,32 +57,39 @@ enum myanmar_syllable_type_t { %%{ -export A = 9; -export As = 18; + +# These values are replicated from indic.hh, and relisted in myanmar.cc; keep in sync. + export C = 1; -export D = 10; -export D0 = 20; -export DB = 3; -export GB = 10; -export H = 4; export IV = 2; -export MH = 21; -export ML = 32; -export MR = 22; -export MW = 23; -export MY = 24; -export PT = 25; -export V = 8; -export VAbv = 26; -export VBlw = 27; -export VPre = 28; -export VPst = 29; -export VS = 30; -export ZWJ = 6; +export DB = 3; # Dot below = OT_N +export H = 4; export ZWNJ = 5; +export ZWJ = 6; +export SM = 8; # Visarga and Shan tones +export GB = 10; # = OT_PLACEHOLDER +export DOTTEDCIRCLE = 11; +export A = 9; export Ra = 15; -export P = 31; -export CS = 19; +export CS = 18; + +export VAbv = 20; +export VBlw = 21; +export VPre = 22; +export VPst = 23; + +export As = 32; # Asat +export D = 33; # Digits except zero +export D0 = 34; # Digit zero +export MH = 35; # Medial Ha +export MR = 36; # Medial Ra +export MW = 37; # Medial Wa, Shan Wa +export MY = 38; # Medial Ya, Mon Na, Mon Ma +export PT = 39; # Pwo and other tones +export VS = 40; # Variation selectors +export P = 41; # Punctuation +export ML = 42; # Medial Mon La + j = ZWJ|ZWNJ; # Joiners k = (Ra As H); # Kinzi @@ -82,11 +101,11 @@ main_vowel_group = (VPre.VS?)* VAbv* VBlw* A* (DB As?)?; post_vowel_group = VPst MH? ML? As* VAbv* A* (DB As?)?; pwo_tone_group = PT A* DB? As?; -complex_syllable_tail = As* medial_group main_vowel_group post_vowel_group* pwo_tone_group* V* j?; +complex_syllable_tail = As* medial_group main_vowel_group post_vowel_group* pwo_tone_group* SM* j?; syllable_tail = (H (c|IV).VS?)* (H | complex_syllable_tail); -consonant_syllable = (k|CS)? (c|IV|D|GB).VS? syllable_tail; -punctuation_cluster = P V; +consonant_syllable = (k|CS)? (c|IV|D|GB|DOTTEDCIRCLE).VS? syllable_tail; +punctuation_cluster = P SM; broken_cluster = k? VS? syllable_tail; other = any; diff --git a/src/hb-ot-shaper-myanmar.cc b/src/hb-ot-shaper-myanmar.cc index ecb4cf1ab..9696ced70 100644 --- a/src/hb-ot-shaper-myanmar.cc +++ b/src/hb-ot-shaper-myanmar.cc @@ -28,14 +28,51 @@ #ifndef HB_NO_OT_SHAPE -#include "hb-ot-shaper-myanmar.hh" #include "hb-ot-shaper-myanmar-machine.hh" +#include "hb-ot-shaper-indic.hh" +#include "hb-ot-layout.hh" /* * Myanmar shaper. */ + +#define M_Check(C) static_assert (OT_##C == M_Cat(C), "") + +M_Check (C); +M_Check (IV); +M_Check (DB); +M_Check (H); +M_Check (ZWNJ); +M_Check (ZWJ); +M_Check (SM); +M_Check (GB); +M_Check (DOTTEDCIRCLE); +M_Check (A); +M_Check (Ra); +M_Check (CS); + +M_Check (VAbv); +M_Check (VBlw); +M_Check (VPre); +M_Check (VPst); + +M_Check (As); +M_Check (D); +M_Check (D0); +M_Check (MH); +M_Check (MR); +M_Check (MW); +M_Check (MY); +M_Check (PT); +M_Check (VS); +M_Check (P); +M_Check (ML); + +#undef M_Check + + static const hb_tag_t myanmar_basic_features[] = { @@ -62,6 +99,40 @@ myanmar_other_features[] = HB_TAG('p','s','t','s'), }; +static inline void +set_myanmar_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + + info.myanmar_category() = (myanmar_category_t) (type & 0xFFu); +} + + +static inline bool +is_one_of_myanmar (const hb_glyph_info_t &info, unsigned int flags) +{ + /* If it ligated, all bets are off. */ + if (_hb_glyph_info_ligated (&info)) return false; + return !!(FLAG_UNSAFE (info.indic_category()) & flags); +} + +/* Note: + * + * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels + * cannot happen in a consonant syllable. The plus side however is, we can call the + * consonant syllable logic from the vowel syllable function and get it all right! + * + * Keep in sync with consonant_categories in the generator. */ +#define CONSONANT_FLAGS_MYANMAR (FLAG (M_Cat(C)) | FLAG (M_Cat(CS)) | FLAG (M_Cat(Ra)) | /* FLAG (M_Cat(CM)) | */ FLAG (M_Cat(IV)) | FLAG (M_Cat(GB)) | FLAG (M_Cat(DOTTEDCIRCLE))) + +static inline bool +is_consonant_myanmar (const hb_glyph_info_t &info) +{ + return is_one_of_myanmar (info, CONSONANT_FLAGS_MYANMAR); +} + + static void setup_syllables_myanmar (const hb_ot_shape_plan_t *plan, hb_font_t *font, @@ -131,7 +202,7 @@ compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) int a = pa->myanmar_position(); int b = pb->myanmar_position(); - return a < b ? -1 : a == b ? 0 : +1; + return (int) a - (int) b; } @@ -150,9 +221,9 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer, { unsigned int limit = start; if (start + 3 <= end && - info[start ].myanmar_category() == OT_Ra && - info[start+1].myanmar_category() == OT_As && - info[start+2].myanmar_category() == OT_H) + info[start ].myanmar_category() == M_Cat(Ra) && + info[start+1].myanmar_category() == M_Cat(As) && + info[start+2].myanmar_category() == M_Cat(H)) { limit += 3; base = start; @@ -164,7 +235,7 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer, base = limit; for (unsigned int i = limit; i < end; i++) - if (is_consonant (info[i])) + if (is_consonant_myanmar (info[i])) { base = i; break; @@ -189,39 +260,40 @@ initial_reordering_consonant_syllable (hb_buffer_t *buffer, * Myanmar reordering! */ for (; i < end; i++) { - if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */ + if (info[i].myanmar_category() == M_Cat(MR)) /* Pre-base reordering */ { info[i].myanmar_position() = POS_PRE_C; continue; } - if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */ + if (info[i].myanmar_category() == M_Cat(VPre)) /* Left matra */ { + info[i].myanmar_position() = POS_PRE_M; continue; } - if (info[i].myanmar_category() == OT_VS) + if (info[i].myanmar_category() == M_Cat(VS)) { info[i].myanmar_position() = info[i - 1].myanmar_position(); continue; } - if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw) + if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == M_Cat(VBlw)) { pos = POS_BELOW_C; info[i].myanmar_position() = pos; continue; } - if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A) + if (pos == POS_BELOW_C && info[i].myanmar_category() == M_Cat(A)) { info[i].myanmar_position() = POS_BEFORE_SUB; continue; } - if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw) + if (pos == POS_BELOW_C && info[i].myanmar_category() == M_Cat(VBlw)) { info[i].myanmar_position() = pos; continue; } - if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A) + if (pos == POS_BELOW_C && info[i].myanmar_category() != M_Cat(A)) { pos = POS_AFTER_SUB; info[i].myanmar_position() = pos; @@ -264,7 +336,7 @@ reorder_myanmar (const hb_ot_shape_plan_t *plan, { hb_syllabic_insert_dotted_circles (font, buffer, myanmar_broken_cluster, - OT_GB); + M_Cat(DOTTEDCIRCLE)); foreach_syllable (buffer, start, end) reorder_syllable_myanmar (plan, font->face, buffer, start, end); diff --git a/src/hb-ot-shaper-myanmar.hh b/src/hb-ot-shaper-myanmar.hh deleted file mode 100644 index 212e290a3..000000000 --- a/src/hb-ot-shaper-myanmar.hh +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright © 2018 Google, Inc. - * - * This is part of HarfBuzz, a text shaping library. - * - * Permission is hereby granted, without written agreement and without - * license or royalty fees, to use, copy, modify, and distribute this - * software and its documentation for any purpose, provided that the - * above copyright notice and the following two paragraphs appear in - * all copies of this software. - * - * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN - * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO - * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - * - * Google Author(s): Behdad Esfahbod - */ - -#ifndef HB_OT_SHAPER_MYANMAR_HH -#define HB_OT_SHAPER_MYANMAR_HH - -#include "hb.hh" - -#include "hb-ot-shaper-indic.hh" - - -/* buffer var allocations */ -#define myanmar_category() indic_category() /* myanmar_category_t */ -#define myanmar_position() indic_position() /* myanmar_position_t */ - - -/* Note: This enum is duplicated in the -machine.rl source file. - * Not sure how to avoid duplication. */ -enum myanmar_category_t { - OT_As = 18, /* Asat */ - OT_D0 = 20, /* Digit zero */ - OT_DB = OT_N, /* Dot below */ - OT_GB = OT_PLACEHOLDER, - OT_MH = 21, /* Various consonant medial types */ - OT_MR = 22, /* Various consonant medial types */ - OT_MW = 23, /* Various consonant medial types */ - OT_MY = 24, /* Various consonant medial types */ - OT_PT = 25, /* Pwo and other tones */ - //OT_VAbv = 26, - //OT_VBlw = 27, - //OT_VPre = 28, - //OT_VPst = 29, - OT_VS = 30, /* Variation selectors */ - OT_P = 31, /* Punctuation */ - OT_D = OT_GB, /* Digits except zero */ - OT_ML = 32, /* Various consonant medial types */ -}; - -using myanmar_position_t = indic_position_t; - -static inline void -set_myanmar_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - unsigned int cat = type & 0xFFu; - myanmar_position_t pos = (myanmar_position_t) (type >> 8); - - /* Myanmar - * https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze - */ - if (unlikely (hb_in_range (u, 0xFE00u, 0xFE0Fu))) - cat = OT_VS; - - switch (u) - { - case 0x104Eu: - cat = OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */ - break; - - case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: - case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: - case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: - case 0x25FEu: - cat = OT_GB; - break; - - case 0x1004u: case 0x101Bu: case 0x105Au: - cat = OT_Ra; - break; - - case 0x1032u: case 0x1036u: - cat = OT_A; - break; - - case 0x1039u: - cat = OT_H; - break; - - case 0x103Au: - cat = OT_As; - break; - - case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: - case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: - case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: - case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: - case 0x1097u: case 0x1098u: case 0x1099u: - cat = OT_D; - break; - - case 0x1040u: - cat = OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ - break; - - case 0x103Eu: - cat = OT_MH; - break; - - case 0x1060u: - cat = OT_ML; - break; - - case 0x103Cu: - cat = OT_MR; - break; - - case 0x103Du: case 0x1082u: - cat = OT_MW; - break; - - case 0x103Bu: case 0x105Eu: case 0x105Fu: - cat = OT_MY; - break; - - case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: - case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: - cat = OT_PT; - break; - - case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: - case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: - case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: - cat = OT_SM; - break; - - case 0x104Au: case 0x104Bu: - cat = OT_P; - break; - - case 0xAA74u: case 0xAA75u: case 0xAA76u: - /* https://github.com/harfbuzz/harfbuzz/issues/218 */ - cat = OT_C; - break; - } - - if (cat == OT_M) - { - switch ((int) pos) - { - case POS_PRE_C: cat = (myanmar_category_t) OT_VPre; - pos = POS_PRE_M; break; - case POS_ABOVE_C: cat = (myanmar_category_t) OT_VAbv; break; - case POS_BELOW_C: cat = (myanmar_category_t) OT_VBlw; break; - case POS_POST_C: cat = (myanmar_category_t) OT_VPst; break; - } - } - - info.myanmar_category() = cat; - info.myanmar_position() = pos; -} - - -#endif /* HB_OT_SHAPER_MYANMAR_HH */ diff --git a/src/meson.build b/src/meson.build index e81496567..bca289dc5 100644 --- a/src/meson.build +++ b/src/meson.build @@ -148,9 +148,7 @@ hb_base_sources = files( 'hb-ot-shaper-indic.cc', 'hb-ot-shaper-indic.hh', 'hb-ot-shaper-khmer.cc', - 'hb-ot-shaper-khmer.hh', 'hb-ot-shaper-myanmar.cc', - 'hb-ot-shaper-myanmar.hh', 'hb-ot-shaper-syllabic.cc', 'hb-ot-shaper-syllabic.hh', 'hb-ot-shaper-thai.cc',