From 8eaee2f887fffb1130b41668a54cd98866f0bfb5 Mon Sep 17 00:00:00 2001 From: David Corbett Date: Thu, 7 Oct 2021 20:10:31 -0400 Subject: [PATCH] [USE] Only skip default ignorables within CCSes --- src/gen-use-table.py | 106 +++++++------ src/hb-ot-shape-complex-use-machine.hh | 55 +++---- src/hb-ot-shape-complex-use-machine.rl | 9 +- src/hb-ot-shape-complex-use-table.hh | 145 ++++++++++++------ src/update-unicode-tables.make | 16 +- .../data/in-house/tests/use-syllable.tests | 2 +- 6 files changed, 189 insertions(+), 144 deletions(-) diff --git a/src/gen-use-table.py b/src/gen-use-table.py index 118f6f2b3..d0bb68a04 100755 --- a/src/gen-use-table.py +++ b/src/gen-use-table.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 # flake8: noqa: F821 -"""usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt ArabicShaping.txt Blocks.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt +"""usage: ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt DerivedCoreProperties.txt UnicodeData.txt ArabicShaping.txt Blocks.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt Input files: * https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt * https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt +* https://unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt * https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt * https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt * https://unicode.org/Public/UCD/latest/ucd/Blocks.txt @@ -15,7 +16,7 @@ Input files: import sys -if len (sys.argv) != 8: +if len (sys.argv) != 9: sys.exit (__doc__) DISABLED_BLOCKS = [ @@ -26,8 +27,8 @@ DISABLED_BLOCKS = [ files = [open (x, encoding='utf-8') for x in sys.argv[1:]] -headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 2] -for j in range(5, 7): +headers = [[f.readline () for i in range (2)] for j,f in enumerate(files) if j != 3] +for j in range(6, 8): for line in files[j]: line = line.rstrip() if not line: @@ -55,22 +56,24 @@ for i, f in enumerate (files): else: end = int (uu[1], 16) - t = fields[1 if i not in [2, 3] else 2] + t = fields[1 if i not in [3, 4] else 2] - if i == 3: + if i == 2 and t != 'Default_Ignorable_Code_Point': + continue + elif i == 4: t = 'jt_' + t - elif i == 5 and t == 'Consonant_Final_Modifier': + elif i == 6 and t == 'Consonant_Final_Modifier': # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/336 t = 'Syllable_Modifier' - elif i == 6 and t == 'NA': + elif i == 7 and t == 'NA': t = 'Not_Applicable' - i0 = i if i < 5 else i - 5 + i0 = i if i < 6 else i - 6 for u in range (start, end + 1): data[i0][u] = t values[i0][t] = values[i0].get (t, 0) + end - start + 1 -defaults = ('Other', 'Not_Applicable', 'Cn', 'jt_X', 'No_Block') +defaults = ('Other', 'Not_Applicable', '', 'Cn', 'jt_X', 'No_Block') # TODO Characters that are not in Unicode Indic files, but used in USE data[0][0x0640] = defaults[0] @@ -124,12 +127,12 @@ for i,v in enumerate (defaults): combined = {} for i,d in enumerate (data): for u,v in d.items (): - if i >= 2 and not u in combined: + if i >= 3 and not u in combined: continue if not u in combined: combined[u] = list (defaults) combined[u][i] = v -combined = {k:v for k,v in combined.items() if v[4] not in DISABLED_BLOCKS} +combined = {k: v for k, v in combined.items() if v[5] not in DISABLED_BLOCKS} data = combined del combined @@ -229,7 +232,7 @@ for name in property_names: globals().update(property_values) -def is_BASE(U, UISC, UGC, AJT): +def is_BASE(U, UISC, UDI, UGC, AJT): return (UISC in [Number, Consonant, Consonant_Head_Letter, Tone_Letter, Vowel_Independent, @@ -238,77 +241,80 @@ def is_BASE(U, UISC, UGC, AJT): AJT in [jt_C, jt_D, jt_L, jt_R] and UISC != Joiner or (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial, Consonant_Subjoined, Vowel, Vowel_Dependent])) -def is_BASE_NUM(U, UISC, UGC, AJT): +def is_BASE_NUM(U, UISC, UDI, UGC, AJT): return UISC == Brahmi_Joining_Number -def is_BASE_OTHER(U, UISC, UGC, AJT): +def is_BASE_OTHER(U, UISC, UDI, UGC, AJT): if UISC == Consonant_Placeholder: return True return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE] -def is_CONS_FINAL(U, UISC, UGC, AJT): +def is_CGJ(U, UISC, UDI, UGC, AJT): + return U == 0x200D or UDI and UGC in [Mc, Me, Mn] +def is_CONS_FINAL(U, UISC, UDI, UGC, AJT): return ((UISC == Consonant_Final and UGC != Lo) or UISC == Consonant_Succeeding_Repha) -def is_CONS_FINAL_MOD(U, UISC, UGC, AJT): +def is_CONS_FINAL_MOD(U, UISC, UDI, UGC, AJT): return UISC == Syllable_Modifier -def is_CONS_MED(U, UISC, UGC, AJT): +def is_CONS_MED(U, UISC, UDI, UGC, AJT): # Consonant_Initial_Postfixed is new in Unicode 11; not in the spec. return (UISC == Consonant_Medial and UGC != Lo or UISC == Consonant_Initial_Postfixed) -def is_CONS_MOD(U, UISC, UGC, AJT): +def is_CONS_MOD(U, UISC, UDI, UGC, AJT): return (UISC in [Nukta, Gemination_Mark, Consonant_Killer] and - not is_SYM_MOD(U, UISC, UGC, AJT)) -def is_CONS_SUB(U, UISC, UGC, AJT): + not is_SYM_MOD(U, UISC, UDI, UGC, AJT)) +def is_CONS_SUB(U, UISC, UDI, UGC, AJT): return UISC == Consonant_Subjoined and UGC != Lo -def is_CONS_WITH_STACKER(U, UISC, UGC, AJT): +def is_CONS_WITH_STACKER(U, UISC, UDI, UGC, AJT): return UISC == Consonant_With_Stacker -def is_HALANT(U, UISC, UGC, AJT): +def is_HALANT(U, UISC, UDI, UGC, AJT): return (UISC in [Virama, Invisible_Stacker] - and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC, AJT) - and not is_SAKOT(U, UISC, UGC, AJT)) -def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UGC, AJT): + and not is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT) + and not is_SAKOT(U, UISC, UDI, UGC, AJT)) +def is_HALANT_OR_VOWEL_MODIFIER(U, UISC, UDI, UGC, AJT): # https://github.com/harfbuzz/harfbuzz/issues/1102 # https://github.com/harfbuzz/harfbuzz/issues/1379 return U in [0x11046, 0x1134D] -def is_HALANT_NUM(U, UISC, UGC, AJT): +def is_HALANT_NUM(U, UISC, UDI, UGC, AJT): return UISC == Number_Joiner -def is_HIEROGLYPH(U, UISC, UGC, AJT): +def is_HIEROGLYPH(U, UISC, UDI, UGC, AJT): return UISC == Hieroglyph -def is_HIEROGLYPH_JOINER(U, UISC, UGC, AJT): +def is_HIEROGLYPH_JOINER(U, UISC, UDI, UGC, AJT): return UISC == Hieroglyph_Joiner -def is_HIEROGLYPH_SEGMENT_BEGIN(U, UISC, UGC, AJT): +def is_HIEROGLYPH_SEGMENT_BEGIN(U, UISC, UDI, UGC, AJT): return UISC == Hieroglyph_Segment_Begin -def is_HIEROGLYPH_SEGMENT_END(U, UISC, UGC, AJT): +def is_HIEROGLYPH_SEGMENT_END(U, UISC, UDI, UGC, AJT): return UISC == Hieroglyph_Segment_End -def is_ZWNJ(U, UISC, UGC, AJT): +def is_ZWNJ(U, UISC, UDI, UGC, AJT): return UISC == Non_Joiner -def is_OTHER(U, UISC, UGC, AJT): +def is_OTHER(U, UISC, UDI, UGC, AJT): return ((UGC in [Cn, Po] or UISC in [Consonant_Dead, Joiner, Modifying_Letter, Other]) - and not is_BASE(U, UISC, UGC, AJT) - and not is_BASE_OTHER(U, UISC, UGC, AJT) - and not is_SYM(U, UISC, UGC, AJT) - and not is_SYM_MOD(U, UISC, UGC, AJT) + and not is_BASE(U, UISC, UDI, UGC, AJT) + and not is_BASE_OTHER(U, UISC, UDI, UGC, AJT) + and not is_CGJ(U, UISC, UDI, UGC, AJT) + and not is_SYM(U, UISC, UDI, UGC, AJT) + and not is_SYM_MOD(U, UISC, UDI, UGC, AJT) ) -def is_REPHA(U, UISC, UGC, AJT): +def is_REPHA(U, UISC, UDI, UGC, AJT): return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed] -def is_SAKOT(U, UISC, UGC, AJT): +def is_SAKOT(U, UISC, UDI, UGC, AJT): return U == 0x1A60 -def is_SYM(U, UISC, UGC, AJT): +def is_SYM(U, UISC, UDI, UGC, AJT): if U in [0x25CC, 0x1E14F]: return False return UGC in [So, Sc] and U not in [0x0F01, 0x1B62, 0x1B68] -def is_SYM_MOD(U, UISC, UGC, AJT): +def is_SYM_MOD(U, UISC, UDI, UGC, AJT): return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73] -def is_VOWEL(U, UISC, UGC, AJT): +def is_VOWEL(U, UISC, UDI, UGC, AJT): # https://github.com/harfbuzz/harfbuzz/issues/376 return (UISC == Pure_Killer or (UGC != Lo and UISC in [Vowel, Vowel_Dependent] and U not in [0xAA29])) -def is_VOWEL_MOD(U, UISC, UGC, AJT): +def is_VOWEL_MOD(U, UISC, UDI, UGC, AJT): # https://github.com/harfbuzz/harfbuzz/issues/376 return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or (UGC != Lo and (UISC == Bindu or U in [0xAA29]))) -# CGJ, VS, WJ, and ZWJ are handled in find_syllables use_mapping = { 'B': is_BASE, 'N': is_BASE_NUM, 'GB': is_BASE_OTHER, + 'CGJ': is_CGJ, 'F': is_CONS_FINAL, 'FM': is_CONS_FINAL_MOD, 'M': is_CONS_MED, @@ -379,7 +385,7 @@ use_positions = { def map_to_use(data): out = {} items = use_mapping.items() - for U,(UISC,UIPC,UGC,AJT,UBlock) in data.items(): + for U, (UISC, UIPC, UDI, UGC, AJT, UBlock) in data.items(): # Resolve Indic_Syllabic_Category @@ -400,8 +406,8 @@ def map_to_use(data): # TODO: https://github.com/microsoft/font-tools/issues/1 if U == 0xA982: UISC = Consonant_Succeeding_Repha - values = [k for k,v in items if v(U,UISC,UGC,AJT)] - assert len(values) == 1, "%s %s %s %s %s" % (hex(U), UISC, UGC, AJT, values) + values = [k for k,v in items if v(U, UISC, UDI, UGC, AJT)] + assert len(values) == 1, "%s %s %s %s %s %s" % (hex(U), UISC, UDI, UGC, AJT, values) USE = values[0] # Resolve Indic_Positional_Category @@ -422,12 +428,12 @@ def map_to_use(data): if 0x11131 <= U <= 0x11132: UIPC = Top assert (UIPC in [Not_Applicable, Visual_Order_Left] or U == 0x0F7F or - USE in use_positions), "%s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, AJT) + USE in use_positions), "%s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT) pos_mapping = use_positions.get(USE, None) if pos_mapping: values = [k for k,v in pos_mapping.items() if v and UIPC in v] - assert len(values) == 1, "%s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UGC, AJT, values) + assert len(values) == 1, "%s %s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT, values) USE = USE + values[0] out[U] = (USE, UBlock) @@ -440,7 +446,7 @@ print ("/* == Start of generated table == */") print ("/*") print (" * The following table is generated by running:") print (" *") -print (" * {} IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt ArabicShaping.txt Blocks.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt".format (sys.argv[0])) +print (" * {} IndicSyllabicCategory.txt IndicPositionalCategory.txt DerivedCoreProperties.txt UnicodeData.txt ArabicShaping.txt Blocks.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt".format (sys.argv[0])) print (" *") print (" * on files with these headers:") print (" *") diff --git a/src/hb-ot-shape-complex-use-machine.hh b/src/hb-ot-shape-complex-use-machine.hh index bb046a72e..f753aa038 100644 --- a/src/hb-ot-shape-complex-use-machine.hh +++ b/src/hb-ot-shape-complex-use-machine.hh @@ -56,6 +56,7 @@ enum use_syllable_type_t { #line 58 "hb-ot-shape-complex-use-machine.hh" #define use_syllable_machine_ex_B 1u +#define use_syllable_machine_ex_CGJ 6u #define use_syllable_machine_ex_CMAbv 31u #define use_syllable_machine_ex_CMBlw 32u #define use_syllable_machine_ex_CS 43u @@ -96,7 +97,7 @@ enum use_syllable_type_t { #define use_syllable_machine_ex_ZWNJ 14u -#line 100 "hb-ot-shape-complex-use-machine.hh" +#line 101 "hb-ot-shape-complex-use-machine.hh" static const unsigned char _use_syllable_machine_trans_keys[] = { 1u, 1u, 1u, 1u, 0u, 51u, 11u, 48u, 11u, 48u, 1u, 1u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, @@ -369,7 +370,7 @@ static const int use_syllable_machine_en_main = 2; -#line 176 "hb-ot-shape-complex-use-machine.rl" +#line 177 "hb-ot-shape-complex-use-machine.rl" #define found_syllable(syllable_type) \ @@ -422,8 +423,8 @@ HB_FUNCOBJ (machine_index); static bool -not_standard_default_ignorable (const hb_glyph_info_t &i) -{ return !(i.use_category() == USE(O) && _hb_glyph_info_is_default_ignorable (&i)); } +not_ccs_default_ignorable (const hb_glyph_info_t &i) +{ return !(i.use_category() == USE(CGJ) && _hb_glyph_info_is_default_ignorable (&i)); } static inline void find_syllables_use (hb_buffer_t *buffer) @@ -432,13 +433,13 @@ find_syllables_use (hb_buffer_t *buffer) auto p = + hb_iter (info, buffer->len) | hb_enumerate - | hb_filter ([] (const hb_glyph_info_t &i) { return not_standard_default_ignorable (i); }, + | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); }, hb_second) | hb_filter ([&] (const hb_pair_t p) { if (p.second.use_category() == USE(ZWNJ)) for (unsigned i = p.first + 1; i < buffer->len; ++i) - if (not_standard_default_ignorable (info[i])) + if (not_ccs_default_ignorable (info[i])) return !_hb_glyph_info_is_unicode_mark (&info[i]); return true; }) @@ -452,7 +453,7 @@ find_syllables_use (hb_buffer_t *buffer) unsigned int act HB_UNUSED; int cs; -#line 456 "hb-ot-shape-complex-use-machine.hh" +#line 457 "hb-ot-shape-complex-use-machine.hh" { cs = use_syllable_machine_start; ts = 0; @@ -460,12 +461,12 @@ find_syllables_use (hb_buffer_t *buffer) act = 0; } -#line 260 "hb-ot-shape-complex-use-machine.rl" +#line 261 "hb-ot-shape-complex-use-machine.rl" unsigned int syllable_serial = 1; -#line 469 "hb-ot-shape-complex-use-machine.hh" +#line 470 "hb-ot-shape-complex-use-machine.hh" { int _slen; int _trans; @@ -479,7 +480,7 @@ _resume: #line 1 "NONE" {ts = p;} break; -#line 483 "hb-ot-shape-complex-use-machine.hh" +#line 484 "hb-ot-shape-complex-use-machine.hh" } _keys = _use_syllable_machine_trans_keys + (cs<<1); @@ -502,62 +503,62 @@ _eof_trans: {te = p+1;} break; case 5: -#line 163 "hb-ot-shape-complex-use-machine.rl" +#line 164 "hb-ot-shape-complex-use-machine.rl" {te = p+1;{ found_syllable (use_independent_cluster); }} break; case 9: -#line 166 "hb-ot-shape-complex-use-machine.rl" +#line 167 "hb-ot-shape-complex-use-machine.rl" {te = p+1;{ found_syllable (use_standard_cluster); }} break; case 7: -#line 171 "hb-ot-shape-complex-use-machine.rl" +#line 172 "hb-ot-shape-complex-use-machine.rl" {te = p+1;{ found_syllable (use_broken_cluster); }} break; case 6: -#line 172 "hb-ot-shape-complex-use-machine.rl" +#line 173 "hb-ot-shape-complex-use-machine.rl" {te = p+1;{ found_syllable (use_non_cluster); }} break; case 10: -#line 164 "hb-ot-shape-complex-use-machine.rl" +#line 165 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_virama_terminated_cluster); }} break; case 11: -#line 165 "hb-ot-shape-complex-use-machine.rl" +#line 166 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_sakot_terminated_cluster); }} break; case 8: -#line 166 "hb-ot-shape-complex-use-machine.rl" +#line 167 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_standard_cluster); }} break; case 13: -#line 167 "hb-ot-shape-complex-use-machine.rl" +#line 168 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_number_joiner_terminated_cluster); }} break; case 12: -#line 168 "hb-ot-shape-complex-use-machine.rl" +#line 169 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_numeral_cluster); }} break; case 14: -#line 169 "hb-ot-shape-complex-use-machine.rl" +#line 170 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_symbol_cluster); }} break; case 17: -#line 170 "hb-ot-shape-complex-use-machine.rl" +#line 171 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_hieroglyph_cluster); }} break; case 15: -#line 171 "hb-ot-shape-complex-use-machine.rl" +#line 172 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_broken_cluster); }} break; case 16: -#line 172 "hb-ot-shape-complex-use-machine.rl" +#line 173 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (use_non_cluster); }} break; case 1: -#line 171 "hb-ot-shape-complex-use-machine.rl" +#line 172 "hb-ot-shape-complex-use-machine.rl" {{p = ((te))-1;}{ found_syllable (use_broken_cluster); }} break; -#line 561 "hb-ot-shape-complex-use-machine.hh" +#line 562 "hb-ot-shape-complex-use-machine.hh" } _again: @@ -566,7 +567,7 @@ _again: #line 1 "NONE" {ts = 0;} break; -#line 570 "hb-ot-shape-complex-use-machine.hh" +#line 571 "hb-ot-shape-complex-use-machine.hh" } if ( ++p != pe ) @@ -582,7 +583,7 @@ _again: } -#line 265 "hb-ot-shape-complex-use-machine.rl" +#line 266 "hb-ot-shape-complex-use-machine.rl" } diff --git a/src/hb-ot-shape-complex-use-machine.rl b/src/hb-ot-shape-complex-use-machine.rl index 00d82b4b4..ea7531f7d 100644 --- a/src/hb-ot-shape-complex-use-machine.rl +++ b/src/hb-ot-shape-complex-use-machine.rl @@ -68,6 +68,7 @@ export O = 0; # OTHER export B = 1; # BASE export N = 4; # BASE_NUM export GB = 5; # BASE_OTHER +export CGJ = 6; # CGJ export SUB = 11; # CONS_SUB export H = 12; # HALANT @@ -225,8 +226,8 @@ HB_FUNCOBJ (machine_index); static bool -not_standard_default_ignorable (const hb_glyph_info_t &i) -{ return !(i.use_category() == USE(O) && _hb_glyph_info_is_default_ignorable (&i)); } +not_ccs_default_ignorable (const hb_glyph_info_t &i) +{ return !(i.use_category() == USE(CGJ) && _hb_glyph_info_is_default_ignorable (&i)); } static inline void find_syllables_use (hb_buffer_t *buffer) @@ -235,13 +236,13 @@ find_syllables_use (hb_buffer_t *buffer) auto p = + hb_iter (info, buffer->len) | hb_enumerate - | hb_filter ([] (const hb_glyph_info_t &i) { return not_standard_default_ignorable (i); }, + | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); }, hb_second) | hb_filter ([&] (const hb_pair_t p) { if (p.second.use_category() == USE(ZWNJ)) for (unsigned i = p.first + 1; i < buffer->len; ++i) - if (not_standard_default_ignorable (info[i])) + if (not_ccs_default_ignorable (info[i])) return !_hb_glyph_info_is_unicode_mark (&info[i]); return true; }) diff --git a/src/hb-ot-shape-complex-use-table.hh b/src/hb-ot-shape-complex-use-table.hh index fbff07865..dd00bc806 100644 --- a/src/hb-ot-shape-complex-use-table.hh +++ b/src/hb-ot-shape-complex-use-table.hh @@ -2,7 +2,7 @@ /* * The following table is generated by running: * - * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt ArabicShaping.txt Blocks.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt + * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt DerivedCoreProperties.txt UnicodeData.txt ArabicShaping.txt Blocks.txt IndicSyllabicCategory-Additional.txt IndicPositionalCategory-Additional.txt * * on files with these headers: * @@ -10,6 +10,8 @@ * # Date: 2021-05-22, 01:01:00 GMT [KW, RP] * # IndicPositionalCategory-14.0.0.txt * # Date: 2021-05-22, 01:01:00 GMT [KW, RP] + * # DerivedCoreProperties-14.0.0.txt + * # Date: 2021-08-12, 23:12:53 GMT * # ArabicShaping-14.0.0.txt * # Date: 2021-05-21, 01:54:00 GMT [KW, RP] * # Blocks-14.0.0.txt @@ -43,6 +45,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-macros" #define B USE(B) /* BASE */ +#define CGJ USE(CGJ) /* CGJ */ #define CS USE(CS) /* CONS_WITH_STACKER */ #define G USE(G) /* HIEROGLYPH */ #define GB USE(GB) /* BASE_OTHER */ @@ -103,14 +106,20 @@ static const uint8_t use_table[] = { /* 00C0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 00D0 */ O, O, O, O, O, O, O, GB, -#define use_offset_0x0640u 80 +#define use_offset_0x0348u 80 + + + /* Combining Diacritical Marks */ + O, O, O, O, O, O, O, CGJ, + +#define use_offset_0x0640u 88 /* Arabic */ /* 0640 */ B, O, O, O, O, O, O, O, -#define use_offset_0x07c8u 88 +#define use_offset_0x07c8u 96 /* NKo */ @@ -119,7 +128,7 @@ static const uint8_t use_table[] = { /* 07E0 */ B, B, B, B, B, B, B, B, B, B, B, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, /* 07F0 */ VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, O, O, O, B, O, O, VMAbv, O, O, -#define use_offset_0x0840u 144 +#define use_offset_0x0840u 152 /* Mandaic */ @@ -127,7 +136,7 @@ static const uint8_t use_table[] = { /* 0840 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 0850 */ B, B, B, B, B, B, B, B, B, CMBlw, CMBlw, CMBlw, O, O, O, O, -#define use_offset_0x0900u 176 +#define use_offset_0x0900u 184 /* Devanagari */ @@ -240,7 +249,7 @@ static const uint8_t use_table[] = { /* 0DE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B, /* 0DF0 */ O, O, VPst, VPst, O, O, O, O, -#define use_offset_0x0f00u 1448 +#define use_offset_0x0f00u 1456 /* Tibetan */ @@ -259,7 +268,7 @@ static const uint8_t use_table[] = { /* 0FB0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, O, O, /* 0FC0 */ O, O, O, O, O, O, FBlw, O, -#define use_offset_0x1000u 1648 +#define use_offset_0x1000u 1656 /* Myanmar */ @@ -275,7 +284,7 @@ static const uint8_t use_table[] = { /* 1080 */ B, B, MBlw, VPst, VPre, VAbv, VAbv, VMPst, VMPst, VMPst, VMPst, VMPst, VMPst, VMBlw, B, VMPst, /* 1090 */ B, B, B, B, B, B, B, B, B, B, VMPst, VMPst, VPst, VAbv, O, O, -#define use_offset_0x1700u 1808 +#define use_offset_0x1700u 1816 /* Tagalog */ @@ -303,7 +312,7 @@ static const uint8_t use_table[] = { /* 1780 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 1790 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 17A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, - /* 17B0 */ B, B, B, B, O, O, VPst, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VPre, VPre, + /* 17B0 */ B, B, B, B, CGJ, CGJ, VPst, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VPre, VPre, /* 17C0 */ VPre, VPre, VPre, VPre, VPre, VPre, VMAbv, VMPst, VPst, VMAbv, VMAbv, FMAbv, FAbv, CMAbv, FMAbv, VMAbv, /* 17D0 */ FMAbv, VAbv, H, FMAbv, O, O, O, O, O, O, O, O, B, FMAbv, O, O, /* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, @@ -311,7 +320,7 @@ static const uint8_t use_table[] = { /* Mongolian */ - /* 1800 */ B, O, O, O, O, O, O, B, O, O, B, O, O, O, O, O, + /* 1800 */ B, O, O, O, O, O, O, B, O, O, B, CGJ, CGJ, CGJ, O, CGJ, /* 1810 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 1820 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 1830 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, @@ -323,7 +332,7 @@ static const uint8_t use_table[] = { /* 1890 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 18A0 */ B, B, B, B, B, B, B, B, B, CMBlw, B, O, O, O, O, O, -#define use_offset_0x1900u 2240 +#define use_offset_0x1900u 2248 /* Limbu */ @@ -367,7 +376,7 @@ static const uint8_t use_table[] = { /* 1A80 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, /* 1A90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x1b00u 2656 +#define use_offset_0x1b00u 2664 /* Balinese */ @@ -403,7 +412,7 @@ static const uint8_t use_table[] = { /* 1C30 */ FAbv, FAbv, FAbv, FAbv, VMPre, VMPre, FMAbv, CMBlw, O, O, O, O, O, O, O, O, /* 1C40 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, B, B, -#define use_offset_0x1cd0u 2992 +#define use_offset_0x1cd0u 3000 /* Vedic Extensions */ @@ -412,20 +421,20 @@ static const uint8_t use_table[] = { /* 1CE0 */ VMAbv, VMPst, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, O, O, O, O, VMBlw, O, O, /* 1CF0 */ O, O, O, O, VMAbv, CS, CS, VMPst, VMAbv, VMAbv, GB, O, O, O, O, O, -#define use_offset_0x1df8u 3040 +#define use_offset_0x1df8u 3048 /* Combining Diacritical Marks Supplement */ O, O, O, FMAbv, O, O, O, O, -#define use_offset_0x2008u 3048 +#define use_offset_0x2008u 3056 /* General Punctuation */ - O, O, O, O, ZWNJ, O, O, O, + O, O, O, O, ZWNJ, CGJ, O, O, /* 2010 */ GB, GB, GB, GB, GB, O, O, O, -#define use_offset_0x2070u 3064 +#define use_offset_0x2070u 3072 /* Superscripts and Subscripts */ @@ -433,20 +442,20 @@ static const uint8_t use_table[] = { /* 2070 */ O, O, O, O, FMPst, O, O, O, O, O, O, O, O, O, O, O, /* 2080 */ O, O, FMPst, FMPst, FMPst, O, O, O, -#define use_offset_0x20f0u 3088 +#define use_offset_0x20f0u 3096 /* Combining Diacritical Marks for Symbols */ /* 20F0 */ VMAbv, O, O, O, O, O, O, O, -#define use_offset_0x25c8u 3096 +#define use_offset_0x25c8u 3104 /* Geometric Shapes */ O, O, O, O, B, O, O, O, -#define use_offset_0x2d30u 3104 +#define use_offset_0x2d30u 3112 /* Tifinagh */ @@ -457,7 +466,7 @@ static const uint8_t use_table[] = { /* 2D60 */ B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, B, /* 2D70 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, H, -#define use_offset_0xa800u 3184 +#define use_offset_0xa800u 3192 /* Syloti Nagri */ @@ -544,7 +553,7 @@ static const uint8_t use_table[] = { /* AAE0 */ B, B, B, B, B, B, B, B, B, B, B, VPre, VBlw, VAbv, VPre, VPst, /* AAF0 */ O, O, O, O, O, VMPst, H, O, -#define use_offset_0xabc0u 3944 +#define use_offset_0xabc0u 3952 /* Meetei Mayek */ @@ -554,7 +563,14 @@ static const uint8_t use_table[] = { /* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O, /* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x10570u 4008 +#define use_offset_0xfe00u 4016 + + + /* Variation Selectors */ + + /* FE00 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + +#define use_offset_0x10570u 4032 /* Vithkuqi */ @@ -565,7 +581,7 @@ static const uint8_t use_table[] = { /* 105A0 */ B, B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 105B0 */ B, B, O, B, B, B, B, B, B, B, O, B, B, O, O, O, -#define use_offset_0x10a00u 4088 +#define use_offset_0x10a00u 4112 /* Kharoshthi */ @@ -576,7 +592,7 @@ static const uint8_t use_table[] = { /* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H, /* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, -#define use_offset_0x10ac0u 4168 +#define use_offset_0x10ac0u 4192 /* Manichaean */ @@ -585,7 +601,7 @@ static const uint8_t use_table[] = { /* 10AD0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 10AE0 */ B, B, B, B, B, CMBlw, CMBlw, O, -#define use_offset_0x10b80u 4208 +#define use_offset_0x10b80u 4232 /* Psalter Pahlavi */ @@ -594,7 +610,7 @@ static const uint8_t use_table[] = { /* 10B90 */ B, B, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 10BA0 */ O, O, O, O, O, O, O, O, O, B, B, B, B, B, B, O, -#define use_offset_0x10d00u 4256 +#define use_offset_0x10d00u 4280 /* Hanifi Rohingya */ @@ -604,7 +620,7 @@ static const uint8_t use_table[] = { /* 10D20 */ B, B, B, B, VMAbv, VMAbv, VMAbv, CMAbv, O, O, O, O, O, O, O, O, /* 10D30 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x10e80u 4320 +#define use_offset_0x10e80u 4344 /* Yezidi */ @@ -614,7 +630,7 @@ static const uint8_t use_table[] = { /* 10EA0 */ B, B, B, B, B, B, B, B, B, B, O, VAbv, VAbv, O, O, O, /* 10EB0 */ B, B, O, O, O, O, O, O, -#define use_offset_0x10f30u 4376 +#define use_offset_0x10f30u 4400 /* Sogdian */ @@ -658,7 +674,7 @@ static const uint8_t use_table[] = { /* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O, /* 110C0 */ O, O, VBlw, O, O, O, O, O, -#define use_offset_0x11100u 4784 +#define use_offset_0x11100u 4808 /* Chakma */ @@ -696,7 +712,7 @@ static const uint8_t use_table[] = { /* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw, /* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O, -#define use_offset_0x11280u 5104 +#define use_offset_0x11280u 5128 /* Multani */ @@ -724,7 +740,7 @@ static const uint8_t use_table[] = { /* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, /* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, -#define use_offset_0x11400u 5352 +#define use_offset_0x11400u 5376 /* Newa */ @@ -747,7 +763,7 @@ static const uint8_t use_table[] = { /* 114C0 */ VMAbv, VMAbv, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O, /* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x11580u 5576 +#define use_offset_0x11580u 5600 /* Siddham */ @@ -791,7 +807,7 @@ static const uint8_t use_table[] = { /* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, /* 11740 */ B, B, B, B, B, B, B, O, -#define use_offset_0x11800u 6032 +#define use_offset_0x11800u 6056 /* Dogra */ @@ -801,7 +817,7 @@ static const uint8_t use_table[] = { /* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw, /* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O, -#define use_offset_0x11900u 6096 +#define use_offset_0x11900u 6120 /* Dives Akuru */ @@ -813,7 +829,7 @@ static const uint8_t use_table[] = { /* 11940 */ MPst, R, MPst, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O, /* 11950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x119a0u 6192 +#define use_offset_0x119a0u 6216 /* Nandinagari */ @@ -841,7 +857,7 @@ static const uint8_t use_table[] = { /* 11A80 */ B, B, B, B, R, R, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, /* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O, -#define use_offset_0x11c00u 6448 +#define use_offset_0x11c00u 6472 /* Bhaiksuki */ @@ -862,7 +878,7 @@ static const uint8_t use_table[] = { /* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, /* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O, -#define use_offset_0x11d00u 6632 +#define use_offset_0x11d00u 6656 /* Masaram Gondi */ @@ -882,7 +898,7 @@ static const uint8_t use_table[] = { /* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O, /* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x11ee0u 6808 +#define use_offset_0x11ee0u 6832 /* Makasar */ @@ -890,7 +906,7 @@ static const uint8_t use_table[] = { /* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O, -#define use_offset_0x13000u 6832 +#define use_offset_0x13000u 6856 /* Egyptian Hieroglyphs */ @@ -967,7 +983,7 @@ static const uint8_t use_table[] = { /* 13430 */ H, H, H, H, H, H, H, B, B, O, O, O, O, O, O, O, -#define use_offset_0x16ac0u 7920 +#define use_offset_0x16ac0u 7944 /* Tangsa */ @@ -984,7 +1000,7 @@ static const uint8_t use_table[] = { /* 16B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 16B30 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, -#define use_offset_0x16f00u 8040 +#define use_offset_0x16f00u 8064 /* Miao */ @@ -1000,14 +1016,14 @@ static const uint8_t use_table[] = { /* 16F80 */ VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, O, O, O, O, O, O, O, VMBlw, /* 16F90 */ VMBlw, VMBlw, VMBlw, O, O, O, O, O, -#define use_offset_0x16fe0u 8192 +#define use_offset_0x16fe0u 8216 /* Ideographic Symbols and Punctuation */ /* 16FE0 */ O, O, O, O, B, O, O, O, -#define use_offset_0x18b00u 8200 +#define use_offset_0x18b00u 8224 /* Khitan Small Script */ @@ -1043,7 +1059,7 @@ static const uint8_t use_table[] = { /* 18CC0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 18CD0 */ B, B, B, B, B, B, O, O, -#define use_offset_0x1bc00u 8672 +#define use_offset_0x1bc00u 8696 /* Duployan */ @@ -1059,7 +1075,7 @@ static const uint8_t use_table[] = { /* 1BC80 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, /* 1BC90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, CMBlw, CMBlw, O, -#define use_offset_0x1e100u 8832 +#define use_offset_0x1e100u 8856 /* Nyiakeng Puachue Hmong */ @@ -1070,7 +1086,7 @@ static const uint8_t use_table[] = { /* 1E130 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, O, O, /* 1E140 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, B, B, -#define use_offset_0x1e290u 8912 +#define use_offset_0x1e290u 8936 /* Toto */ @@ -1086,7 +1102,7 @@ static const uint8_t use_table[] = { /* 1E2E0 */ B, B, B, B, B, B, B, B, B, B, B, B, VMAbv, VMAbv, VMAbv, VMAbv, /* 1E2F0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x1e900u 9024 +#define use_offset_0x1e900u 9048 /* Adlam */ @@ -1098,7 +1114,28 @@ static const uint8_t use_table[] = { /* 1E940 */ B, B, B, B, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, CMAbv, B, O, O, O, O, /* 1E950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -}; /* Table items: 9120; occupancy: 78% */ +#define use_offset_0xe0100u 9144 + + + /* Variation Selectors Supplement */ + + /* E0100 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0110 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0120 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0130 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0140 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0150 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0160 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0170 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0180 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E0190 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01A0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01B0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01C0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01D0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + /* E01E0 */ CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, CGJ, + +}; /* Table items: 9384; occupancy: 78% */ static inline uint8_t hb_use_get_category (hb_codepoint_t u) @@ -1108,6 +1145,7 @@ hb_use_get_category (hb_codepoint_t u) case 0x0u: if (hb_in_range (u, 0x0028u, 0x003Fu)) return use_table[u - 0x0028u + use_offset_0x0028u]; if (hb_in_range (u, 0x00A0u, 0x00D7u)) return use_table[u - 0x00A0u + use_offset_0x00a0u]; + if (hb_in_range (u, 0x0348u, 0x034Fu)) return use_table[u - 0x0348u + use_offset_0x0348u]; if (hb_in_range (u, 0x0640u, 0x0647u)) return use_table[u - 0x0640u + use_offset_0x0640u]; if (hb_in_range (u, 0x07C8u, 0x07FFu)) return use_table[u - 0x07C8u + use_offset_0x07c8u]; if (hb_in_range (u, 0x0840u, 0x085Fu)) return use_table[u - 0x0840u + use_offset_0x0840u]; @@ -1137,6 +1175,10 @@ hb_use_get_category (hb_codepoint_t u) if (hb_in_range (u, 0xABC0u, 0xABFFu)) return use_table[u - 0xABC0u + use_offset_0xabc0u]; break; + case 0xFu: + if (hb_in_range (u, 0xFE00u, 0xFE0Fu)) return use_table[u - 0xFE00u + use_offset_0xfe00u]; + break; + case 0x10u: if (hb_in_range (u, 0x10570u, 0x105BFu)) return use_table[u - 0x10570u + use_offset_0x10570u]; if (hb_in_range (u, 0x10A00u, 0x10A4Fu)) return use_table[u - 0x10A00u + use_offset_0x10a00u]; @@ -1185,6 +1227,10 @@ hb_use_get_category (hb_codepoint_t u) if (hb_in_range (u, 0x1E900u, 0x1E95Fu)) return use_table[u - 0x1E900u + use_offset_0x1e900u]; break; + case 0xE0u: + if (hb_in_range (u, 0xE0100u, 0xE01EFu)) return use_table[u - 0xE0100u + use_offset_0xe0100u]; + break; + default: break; } @@ -1192,6 +1238,7 @@ hb_use_get_category (hb_codepoint_t u) } #undef B +#undef CGJ #undef CS #undef G #undef GB diff --git a/src/update-unicode-tables.make b/src/update-unicode-tables.make index f31b34e7c..23d595d28 100755 --- a/src/update-unicode-tables.make +++ b/src/update-unicode-tables.make @@ -21,7 +21,7 @@ hb-ot-tag-table.hh: gen-tag-table.py languagetags language-subtag-registry ./$^ > $@ || ($(RM) $@; false) hb-ucd-table.hh: gen-ucd-table.py ucd.nounihan.grouped.zip hb-common.h ./$^ > $@ || ($(RM) $@; false) -hb-ot-shape-complex-use-table.hh: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt ArabicShaping.txt Blocks.txt ms-use/IndicSyllabicCategory-Additional.txt ms-use/IndicPositionalCategory-Additional.txt +hb-ot-shape-complex-use-table.hh: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt DerivedCoreProperties.txt UnicodeData.txt ArabicShaping.txt Blocks.txt ms-use/IndicSyllabicCategory-Additional.txt ms-use/IndicPositionalCategory-Additional.txt ./$^ > $@ || ($(RM) $@; false) hb-ot-shape-complex-vowel-constraints.cc: gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt ./$^ > $@ || ($(RM) $@; false) @@ -29,28 +29,18 @@ hb-ot-shape-complex-vowel-constraints.cc: gen-vowel-constraints.py ms-use/IndicS packtab: /usr/bin/env python3 -c "import packTab" 2>/dev/null || /usr/bin/env python3 -m pip install git+https://github.com/harfbuzz/packtab -ArabicShaping.txt: - curl -O https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt -UnicodeData.txt: - curl -O https://unicode.org/Public/UCD/latest/ucd/UnicodeData.txt -Blocks.txt: - curl -O https://unicode.org/Public/UCD/latest/ucd/Blocks.txt +ArabicShaping.txt DerivedCoreProperties.txt IndicPositionalCategory.txt IndicSyllabicCategory.txt Scripts.txt UnicodeData.txt: + curl -O https://unicode.org/Public/UCD/latest/ucd/$@ emoji-data.txt: curl -O https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt emoji-test.txt: curl -O https://www.unicode.org/Public/emoji/latest/emoji-test.txt -IndicSyllabicCategory.txt: - curl -O https://unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt -IndicPositionalCategory.txt: - curl -O https://unicode.org/Public/UCD/latest/ucd/IndicPositionalCategory.txt languagetags: curl -O https://docs.microsoft.com/en-us/typography/opentype/spec/languagetags language-subtag-registry: curl -O https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry ucd.nounihan.grouped.zip: curl -O https://unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip -Scripts.txt: - curl -O https://unicode.org/Public/UCD/latest/ucd/Scripts.txt clean: $(RM) \ diff --git a/test/shape/data/in-house/tests/use-syllable.tests b/test/shape/data/in-house/tests/use-syllable.tests index a0f0be884..1cc52fd4f 100644 --- a/test/shape/data/in-house/tests/use-syllable.tests +++ b/test/shape/data/in-house/tests/use-syllable.tests @@ -19,4 +19,4 @@ ../fonts/573d3a3177c9a8646e94c8a0d7b224334340946a.ttf;--font-funcs=ft;U+11410,U+200C,U+11442,U+034F,U+11411;[Ga.icd=0+367|Gha.diag=1@100,0+386] ../fonts/e68a88939e0f06e34d2bc911f09b70890289c8fd.ttf;;U+AA00,U+200C,U+AA34;[raMedial_cham_pre=0+400|a_cham=0+1121] ../fonts/2a670df15b73a5dc75a5cc491bde5ac93c5077dc.ttf;;U+11124,U+200D,U+11127;[u11124=0+514|u11127=0+0] -../fonts/2a670df15b73a5dc75a5cc491bde5ac93c5077dc.ttf;;U+11124,U+2060,U+11127;[u11124=0+514|u11127=1+0] +../fonts/2a670df15b73a5dc75a5cc491bde5ac93c5077dc.ttf;;U+11124,U+2060,U+11127;[u11124=0+514|uni25CC=1+547|u11127=1+0]