From 6a38adeb577444cd164eeb6ace70741fe1a33791 Mon Sep 17 00:00:00 2001 From: David Corbett Date: Fri, 3 May 2019 14:16:50 -0400 Subject: [PATCH] [use] Skip most default ignorables when clustering --- src/Makefile.sources | 1 + src/gen-use-table.py | 12 +- src/hb-ot-shape-complex-machine-index.hh | 69 +++ src/hb-ot-shape-complex-use-machine.hh | 594 +++++++++++------------ src/hb-ot-shape-complex-use-machine.rl | 55 ++- src/hb-ot-shape-complex-use-table.cc | 76 ++- src/hb-ot-shape-complex-use.hh | 2 - 7 files changed, 423 insertions(+), 386 deletions(-) create mode 100644 src/hb-ot-shape-complex-machine-index.hh diff --git a/src/Makefile.sources b/src/Makefile.sources index 8bb291206..f7d458704 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -118,6 +118,7 @@ HB_BASE_sources = \ hb-ot-shape-complex-indic.hh \ hb-ot-shape-complex-khmer.cc \ hb-ot-shape-complex-khmer.hh \ + hb-ot-shape-complex-machine-index.hh \ hb-ot-shape-complex-myanmar.cc \ hb-ot-shape-complex-myanmar.hh \ hb-ot-shape-complex-thai.cc \ diff --git a/src/gen-use-table.py b/src/gen-use-table.py index e4b48353f..71f0ec901 100755 --- a/src/gen-use-table.py +++ b/src/gen-use-table.py @@ -51,7 +51,6 @@ for i, f in enumerate (files): defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block') # TODO Characters that are not in Unicode Indic files, but used in USE -data[0][0x034F] = defaults[0] data[0][0x1B61] = defaults[0] data[0][0x1B63] = defaults[0] data[0][0x1B64] = defaults[0] @@ -72,8 +71,6 @@ data[0][0x11C44] = 'Consonant_Placeholder' data[0][0x11C45] = 'Consonant_Placeholder' # TODO https://github.com/harfbuzz/harfbuzz/pull/1399 data[0][0x111C8] = 'Consonant_Placeholder' -for u in range (0xFE00, 0xFE0F + 1): - data[0][u] = defaults[0] # Merge data into one dict: for i,v in enumerate (defaults): @@ -194,8 +191,6 @@ def is_BASE_OTHER(U, UISC, UGC): if UISC == Consonant_Placeholder: return True #SPEC-DRAFT #SPEC-DRAFT return U in [0x00A0, 0x00D7, 0x2015, 0x2022, 0x25CC, 0x25FB, 0x25FC, 0x25FD, 0x25FE] return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE] -def is_CGJ(U, UISC, UGC): - return U == 0x034F def is_CONS_FINAL(U, UISC, UGC): return ((UISC == Consonant_Final and UGC != Lo) or UISC == Consonant_Succeeding_Repha) @@ -234,9 +229,7 @@ def is_OTHER(U, UISC, UGC): return (UISC == Other and not is_SYM(U, UISC, UGC) and not is_SYM_MOD(U, UISC, UGC) - and not is_CGJ(U, UISC, UGC) and not is_Word_Joiner(U, UISC, UGC) - and not is_VARIATION_SELECTOR(U, UISC, UGC) ) def is_Reserved(U, UISC, UGC): return UGC == 'Cn' @@ -250,8 +243,6 @@ def is_SYM(U, UISC, UGC): return UGC in [So, Sc] and U not in [0x1B62, 0x1B68] def is_SYM_MOD(U, UISC, UGC): return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73] -def is_VARIATION_SELECTOR(U, UISC, UGC): - return 0xFE00 <= U <= 0xFE0F def is_VOWEL(U, UISC, UGC): # https://github.com/harfbuzz/harfbuzz/issues/376 return (UISC == Pure_Killer or @@ -261,12 +252,12 @@ def is_VOWEL_MOD(U, UISC, UGC): return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or (UGC != Lo and (UISC == Bindu or U in [0xAA29]))) +# CGJ and VS are handled in find_syllables use_mapping = { 'B': is_BASE, 'IND': is_BASE_IND, 'N': is_BASE_NUM, 'GB': is_BASE_OTHER, - 'CGJ': is_CGJ, 'F': is_CONS_FINAL, 'FM': is_CONS_FINAL_MOD, 'M': is_CONS_MED, @@ -285,7 +276,6 @@ use_mapping = { 'S': is_SYM, 'Sk': is_SAKOT, 'SM': is_SYM_MOD, - 'VS': is_VARIATION_SELECTOR, 'V': is_VOWEL, 'VM': is_VOWEL_MOD, } diff --git a/src/hb-ot-shape-complex-machine-index.hh b/src/hb-ot-shape-complex-machine-index.hh new file mode 100644 index 000000000..9ec1f3eb7 --- /dev/null +++ b/src/hb-ot-shape-complex-machine-index.hh @@ -0,0 +1,69 @@ +/* + * Copyright © 2019,2020 David Corbett + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_SHAPE_COMPLEX_MACHINE_INDEX_HH +#define HB_OT_SHAPE_COMPLEX_MACHINE_INDEX_HH + +#include "hb.hh" + + +template +struct machine_index_t : + hb_iter_with_fallback_t, + typename Iter::item_t> +{ + machine_index_t (const Iter& it) : it (it) {} + machine_index_t (const machine_index_t& o) : it (o.it) {} + + static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator; + static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator; + + typename Iter::item_t __item__ () const { return *it; } + typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; } + unsigned __len__ () const { return it.len (); } + void __next__ () { ++it; } + void __forward__ (unsigned n) { it += n; } + void __prev__ () { --it; } + void __rewind__ (unsigned n) { it -= n; } + void operator = (unsigned n) + { unsigned index = (*it).first; if (index < n) it += n - index; else if (index > n) it -= index - n; } + void operator = (const machine_index_t& o) { *this = (*o.it).first; } + bool operator == (const machine_index_t& o) const { return (*it).first == (*o.it).first; } + bool operator != (const machine_index_t& o) const { return !(*this == o); } + + private: + Iter it; +}; +struct +{ + template + machine_index_t> + operator () (Iter&& it) const + { return machine_index_t> (hb_iter (it)); } +} +HB_FUNCOBJ (machine_index); + + +#endif /* HB_OT_SHAPE_COMPLEX_MACHINE_INDEX_HH */ diff --git a/src/hb-ot-shape-complex-use-machine.hh b/src/hb-ot-shape-complex-use-machine.hh index 462342c61..768fb7528 100644 --- a/src/hb-ot-shape-complex-use-machine.hh +++ b/src/hb-ot-shape-complex-use-machine.hh @@ -32,46 +32,44 @@ #define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH #include "hb.hh" +#include "hb-ot-shape-complex-machine-index.hh" -#line 38 "hb-ot-shape-complex-use-machine.hh" +#line 39 "hb-ot-shape-complex-use-machine.hh" static const unsigned char _use_syllable_machine_trans_keys[] = { - 12u, 48u, 1u, 15u, 1u, 1u, 12u, 48u, 1u, 1u, 0u, 48u, 21u, 21u, 11u, 48u, - 11u, 48u, 1u, 15u, 1u, 1u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, - 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 1u, 1u, 24u, 48u, + 12u, 48u, 1u, 15u, 1u, 1u, 12u, 48u, 1u, 1u, 0u, 48u, 11u, 48u, 11u, 48u, + 1u, 15u, 1u, 1u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u, 45u, 46u, + 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 1u, 1u, 24u, 48u, 23u, 48u, 23u, 48u, + 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, 1u, 48u, 13u, 13u, + 4u, 4u, 11u, 48u, 41u, 42u, 42u, 42u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, + 25u, 47u, 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 24u, 48u, 23u, 48u, 23u, 48u, 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, - 1u, 48u, 11u, 48u, 13u, 21u, 4u, 4u, 13u, 13u, 11u, 48u, 11u, 48u, 41u, 42u, - 42u, 42u, 11u, 48u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u, - 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 24u, 48u, 23u, 48u, 23u, 48u, - 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, 1u, 48u, 1u, 15u, - 4u, 4u, 13u, 21u, 13u, 13u, 12u, 48u, 1u, 48u, 11u, 48u, 41u, 42u, 42u, 42u, - 21u, 42u, 1u, 5u, 0 + 1u, 48u, 1u, 15u, 4u, 4u, 13u, 13u, 12u, 48u, 1u, 48u, 11u, 48u, 41u, 42u, + 42u, 42u, 1u, 5u, 0 }; static const char _use_syllable_machine_key_spans[] = { - 37, 15, 1, 37, 1, 49, 1, 38, - 38, 15, 1, 38, 27, 26, 24, 23, - 22, 2, 1, 25, 25, 25, 1, 25, + 37, 15, 1, 37, 1, 49, 38, 38, + 15, 1, 27, 26, 24, 23, 22, 2, + 1, 25, 25, 25, 1, 25, 26, 26, + 26, 27, 27, 27, 27, 38, 48, 1, + 1, 38, 2, 1, 38, 27, 26, 24, + 23, 22, 2, 1, 25, 25, 25, 25, 26, 26, 26, 27, 27, 27, 27, 38, - 48, 38, 9, 1, 1, 38, 38, 2, - 1, 38, 38, 27, 26, 24, 23, 22, - 2, 1, 25, 25, 25, 25, 26, 26, - 26, 27, 27, 27, 27, 38, 48, 15, - 1, 9, 1, 37, 48, 38, 2, 1, - 22, 5 + 48, 15, 1, 1, 37, 48, 38, 2, + 1, 5 }; static const short _use_syllable_machine_index_offsets[] = { - 0, 38, 54, 56, 94, 96, 146, 148, - 187, 226, 242, 244, 283, 311, 338, 363, - 387, 410, 413, 415, 441, 467, 493, 495, - 521, 548, 575, 602, 630, 658, 686, 714, - 753, 802, 841, 851, 853, 855, 894, 933, - 936, 938, 977, 1016, 1044, 1071, 1096, 1120, - 1143, 1146, 1148, 1174, 1200, 1226, 1252, 1279, - 1306, 1333, 1361, 1389, 1417, 1445, 1484, 1533, - 1549, 1551, 1561, 1563, 1601, 1650, 1689, 1692, - 1694, 1717 + 0, 38, 54, 56, 94, 96, 146, 185, + 224, 240, 242, 270, 297, 322, 346, 369, + 372, 374, 400, 426, 452, 454, 480, 507, + 534, 561, 589, 617, 645, 673, 712, 761, + 763, 765, 804, 807, 809, 848, 876, 903, + 928, 952, 975, 978, 980, 1006, 1032, 1058, + 1084, 1111, 1138, 1165, 1193, 1221, 1249, 1277, + 1316, 1365, 1381, 1383, 1385, 1423, 1472, 1511, + 1514, 1516 }; static const char _use_syllable_machine_indicies[] = { @@ -93,234 +91,205 @@ static const char _use_syllable_machine_indicies[] = { 20, 21, 22, 23, 24, 18, 25, 26, 27, 28, 29, 30, 10, 31, 32, 33, 10, 34, 35, 36, 37, 38, 39, 40, - 13, 10, 42, 41, 44, 1, 43, 43, - 45, 43, 43, 43, 43, 43, 46, 47, - 48, 49, 50, 51, 52, 53, 47, 54, - 46, 55, 56, 57, 58, 43, 59, 60, - 61, 43, 43, 43, 43, 62, 63, 64, - 65, 1, 43, 44, 1, 43, 43, 45, - 43, 43, 43, 43, 43, 66, 47, 48, - 49, 50, 51, 52, 53, 47, 54, 55, - 55, 56, 57, 58, 43, 59, 60, 61, - 43, 43, 43, 43, 62, 63, 64, 65, - 1, 43, 44, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 67, - 68, 67, 44, 67, 44, 1, 43, 43, - 45, 43, 43, 43, 43, 43, 43, 47, - 48, 49, 50, 51, 52, 53, 47, 54, - 55, 55, 56, 57, 58, 43, 59, 60, - 61, 43, 43, 43, 43, 62, 63, 64, - 65, 1, 43, 47, 48, 49, 50, 51, - 43, 43, 43, 43, 43, 43, 56, 57, - 58, 43, 59, 60, 61, 43, 43, 43, - 43, 48, 63, 64, 65, 69, 43, 48, - 49, 50, 51, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 59, 60, 61, - 43, 43, 43, 43, 43, 63, 64, 65, - 69, 43, 49, 50, 51, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 63, - 64, 65, 43, 50, 51, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 63, - 64, 65, 43, 51, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 63, 64, - 65, 43, 63, 64, 43, 64, 43, 49, - 50, 51, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 59, 60, 61, 43, - 43, 43, 43, 43, 63, 64, 65, 69, - 43, 49, 50, 51, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 60, - 61, 43, 43, 43, 43, 43, 63, 64, - 65, 69, 43, 49, 50, 51, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 61, 43, 43, 43, 43, 43, - 63, 64, 65, 69, 43, 71, 70, 49, - 50, 51, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 43, 63, 64, 65, 69, - 43, 48, 49, 50, 51, 43, 43, 43, - 43, 43, 43, 56, 57, 58, 43, 59, - 60, 61, 43, 43, 43, 43, 48, 63, - 64, 65, 69, 43, 48, 49, 50, 51, - 43, 43, 43, 43, 43, 43, 43, 57, - 58, 43, 59, 60, 61, 43, 43, 43, - 43, 48, 63, 64, 65, 69, 43, 48, - 49, 50, 51, 43, 43, 43, 43, 43, - 43, 43, 43, 58, 43, 59, 60, 61, - 43, 43, 43, 43, 48, 63, 64, 65, - 69, 43, 47, 48, 49, 50, 51, 43, - 53, 47, 43, 43, 43, 56, 57, 58, - 43, 59, 60, 61, 43, 43, 43, 43, - 48, 63, 64, 65, 69, 43, 47, 48, - 49, 50, 51, 43, 72, 47, 43, 43, - 43, 56, 57, 58, 43, 59, 60, 61, - 43, 43, 43, 43, 48, 63, 64, 65, - 69, 43, 47, 48, 49, 50, 51, 43, - 43, 47, 43, 43, 43, 56, 57, 58, - 43, 59, 60, 61, 43, 43, 43, 43, - 48, 63, 64, 65, 69, 43, 47, 48, - 49, 50, 51, 52, 53, 47, 43, 43, - 43, 56, 57, 58, 43, 59, 60, 61, - 43, 43, 43, 43, 48, 63, 64, 65, - 69, 43, 44, 1, 43, 43, 45, 43, - 43, 43, 43, 43, 43, 47, 48, 49, - 50, 51, 52, 53, 47, 54, 43, 55, - 56, 57, 58, 43, 59, 60, 61, 43, - 43, 43, 43, 62, 63, 64, 65, 1, - 43, 44, 67, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 67, 67, 68, - 67, 67, 67, 67, 67, 67, 67, 48, - 49, 50, 51, 67, 67, 67, 67, 67, - 67, 67, 67, 67, 67, 59, 60, 61, - 67, 67, 67, 67, 67, 63, 64, 65, - 69, 67, 44, 1, 43, 43, 45, 43, - 43, 43, 43, 43, 43, 47, 48, 49, - 50, 51, 52, 53, 47, 54, 46, 55, - 56, 57, 58, 43, 59, 60, 61, 43, - 43, 43, 43, 62, 63, 64, 65, 1, - 43, 74, 73, 73, 73, 73, 73, 73, - 73, 75, 73, 11, 76, 74, 73, 44, - 1, 43, 43, 45, 43, 43, 43, 43, - 43, 77, 47, 48, 49, 50, 51, 52, - 53, 47, 54, 46, 55, 56, 57, 58, - 43, 59, 60, 61, 43, 78, 79, 43, - 62, 63, 64, 65, 1, 43, 44, 1, - 43, 43, 45, 43, 43, 43, 43, 43, - 43, 47, 48, 49, 50, 51, 52, 53, - 47, 54, 46, 55, 56, 57, 58, 43, - 59, 60, 61, 43, 78, 79, 43, 62, - 63, 64, 65, 1, 43, 78, 79, 80, - 79, 80, 3, 6, 81, 81, 82, 81, - 81, 81, 81, 81, 83, 18, 19, 20, - 21, 22, 23, 24, 18, 25, 27, 27, - 28, 29, 30, 81, 31, 32, 33, 81, - 81, 81, 81, 37, 38, 39, 40, 6, - 81, 3, 6, 81, 81, 82, 81, 81, - 81, 81, 81, 81, 18, 19, 20, 21, + 13, 10, 42, 1, 41, 41, 43, 41, + 41, 41, 41, 41, 41, 44, 45, 46, + 47, 48, 49, 50, 44, 51, 9, 52, + 53, 54, 55, 41, 56, 57, 58, 41, + 41, 41, 41, 59, 60, 61, 62, 1, + 41, 42, 1, 41, 41, 43, 41, 41, + 41, 41, 41, 41, 44, 45, 46, 47, + 48, 49, 50, 44, 51, 52, 52, 53, + 54, 55, 41, 56, 57, 58, 41, 41, + 41, 41, 59, 60, 61, 62, 1, 41, + 42, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 64, 63, + 42, 63, 44, 45, 46, 47, 48, 41, + 41, 41, 41, 41, 41, 53, 54, 55, + 41, 56, 57, 58, 41, 41, 41, 41, + 45, 60, 61, 62, 65, 41, 45, 46, + 47, 48, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 56, 57, 58, 41, + 41, 41, 41, 41, 60, 61, 62, 65, + 41, 46, 47, 48, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 60, 61, + 62, 41, 47, 48, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 60, 61, + 62, 41, 48, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 60, 61, 62, + 41, 60, 61, 41, 61, 41, 46, 47, + 48, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 56, 57, 58, 41, 41, + 41, 41, 41, 60, 61, 62, 65, 41, + 46, 47, 48, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 57, 58, + 41, 41, 41, 41, 41, 60, 61, 62, + 65, 41, 46, 47, 48, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, + 41, 58, 41, 41, 41, 41, 41, 60, + 61, 62, 65, 41, 67, 66, 46, 47, + 48, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 60, 61, 62, 65, 41, + 45, 46, 47, 48, 41, 41, 41, 41, + 41, 41, 53, 54, 55, 41, 56, 57, + 58, 41, 41, 41, 41, 45, 60, 61, + 62, 65, 41, 45, 46, 47, 48, 41, + 41, 41, 41, 41, 41, 41, 54, 55, + 41, 56, 57, 58, 41, 41, 41, 41, + 45, 60, 61, 62, 65, 41, 45, 46, + 47, 48, 41, 41, 41, 41, 41, 41, + 41, 41, 55, 41, 56, 57, 58, 41, + 41, 41, 41, 45, 60, 61, 62, 65, + 41, 44, 45, 46, 47, 48, 41, 50, + 44, 41, 41, 41, 53, 54, 55, 41, + 56, 57, 58, 41, 41, 41, 41, 45, + 60, 61, 62, 65, 41, 44, 45, 46, + 47, 48, 41, 68, 44, 41, 41, 41, + 53, 54, 55, 41, 56, 57, 58, 41, + 41, 41, 41, 45, 60, 61, 62, 65, + 41, 44, 45, 46, 47, 48, 41, 41, + 44, 41, 41, 41, 53, 54, 55, 41, + 56, 57, 58, 41, 41, 41, 41, 45, + 60, 61, 62, 65, 41, 44, 45, 46, + 47, 48, 49, 50, 44, 41, 41, 41, + 53, 54, 55, 41, 56, 57, 58, 41, + 41, 41, 41, 45, 60, 61, 62, 65, + 41, 42, 1, 41, 41, 43, 41, 41, + 41, 41, 41, 41, 44, 45, 46, 47, + 48, 49, 50, 44, 51, 41, 52, 53, + 54, 55, 41, 56, 57, 58, 41, 41, + 41, 41, 59, 60, 61, 62, 1, 41, + 42, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 64, 63, + 63, 63, 63, 63, 63, 63, 45, 46, + 47, 48, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 56, 57, 58, 63, + 63, 63, 63, 63, 60, 61, 62, 65, + 63, 70, 69, 11, 71, 42, 1, 41, + 41, 43, 41, 41, 41, 41, 41, 41, + 44, 45, 46, 47, 48, 49, 50, 44, + 51, 9, 52, 53, 54, 55, 41, 56, + 57, 58, 41, 17, 72, 41, 59, 60, + 61, 62, 1, 41, 17, 72, 73, 72, + 73, 3, 6, 74, 74, 75, 74, 74, + 74, 74, 74, 74, 18, 19, 20, 21, 22, 23, 24, 18, 25, 27, 27, 28, - 29, 30, 81, 31, 32, 33, 81, 81, - 81, 81, 37, 38, 39, 40, 6, 81, - 18, 19, 20, 21, 22, 81, 81, 81, - 81, 81, 81, 28, 29, 30, 81, 31, - 32, 33, 81, 81, 81, 81, 19, 38, - 39, 40, 84, 81, 19, 20, 21, 22, - 81, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 31, 32, 33, 81, 81, 81, - 81, 81, 38, 39, 40, 84, 81, 20, - 21, 22, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 38, 39, 40, 81, - 21, 22, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 38, 39, 40, 81, - 22, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 38, 39, 40, 81, 38, - 39, 81, 39, 81, 20, 21, 22, 81, - 81, 81, 81, 81, 81, 81, 81, 81, - 81, 31, 32, 33, 81, 81, 81, 81, - 81, 38, 39, 40, 84, 81, 20, 21, - 22, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 32, 33, 81, 81, - 81, 81, 81, 38, 39, 40, 84, 81, - 20, 21, 22, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 81, 81, 81, 33, - 81, 81, 81, 81, 81, 38, 39, 40, - 84, 81, 20, 21, 22, 81, 81, 81, - 81, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 81, 81, 81, 38, - 39, 40, 84, 81, 19, 20, 21, 22, - 81, 81, 81, 81, 81, 81, 28, 29, - 30, 81, 31, 32, 33, 81, 81, 81, - 81, 19, 38, 39, 40, 84, 81, 19, - 20, 21, 22, 81, 81, 81, 81, 81, - 81, 81, 29, 30, 81, 31, 32, 33, - 81, 81, 81, 81, 19, 38, 39, 40, - 84, 81, 19, 20, 21, 22, 81, 81, - 81, 81, 81, 81, 81, 81, 30, 81, - 31, 32, 33, 81, 81, 81, 81, 19, - 38, 39, 40, 84, 81, 18, 19, 20, - 21, 22, 81, 24, 18, 81, 81, 81, - 28, 29, 30, 81, 31, 32, 33, 81, - 81, 81, 81, 19, 38, 39, 40, 84, - 81, 18, 19, 20, 21, 22, 81, 85, - 18, 81, 81, 81, 28, 29, 30, 81, - 31, 32, 33, 81, 81, 81, 81, 19, - 38, 39, 40, 84, 81, 18, 19, 20, - 21, 22, 81, 81, 18, 81, 81, 81, - 28, 29, 30, 81, 31, 32, 33, 81, - 81, 81, 81, 19, 38, 39, 40, 84, - 81, 18, 19, 20, 21, 22, 23, 24, - 18, 81, 81, 81, 28, 29, 30, 81, - 31, 32, 33, 81, 81, 81, 81, 19, - 38, 39, 40, 84, 81, 3, 6, 81, - 81, 82, 81, 81, 81, 81, 81, 81, + 29, 30, 74, 31, 32, 33, 74, 74, + 74, 74, 37, 38, 39, 40, 6, 74, + 18, 19, 20, 21, 22, 74, 74, 74, + 74, 74, 74, 28, 29, 30, 74, 31, + 32, 33, 74, 74, 74, 74, 19, 38, + 39, 40, 76, 74, 19, 20, 21, 22, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 31, 32, 33, 74, 74, 74, + 74, 74, 38, 39, 40, 76, 74, 20, + 21, 22, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 38, 39, 40, 74, + 21, 22, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 38, 39, 40, 74, + 22, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 38, 39, 40, 74, 38, + 39, 74, 39, 74, 20, 21, 22, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 31, 32, 33, 74, 74, 74, 74, + 74, 38, 39, 40, 76, 74, 20, 21, + 22, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 32, 33, 74, 74, + 74, 74, 74, 38, 39, 40, 76, 74, + 20, 21, 22, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 33, + 74, 74, 74, 74, 74, 38, 39, 40, + 76, 74, 20, 21, 22, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 38, + 39, 40, 76, 74, 19, 20, 21, 22, + 74, 74, 74, 74, 74, 74, 28, 29, + 30, 74, 31, 32, 33, 74, 74, 74, + 74, 19, 38, 39, 40, 76, 74, 19, + 20, 21, 22, 74, 74, 74, 74, 74, + 74, 74, 29, 30, 74, 31, 32, 33, + 74, 74, 74, 74, 19, 38, 39, 40, + 76, 74, 19, 20, 21, 22, 74, 74, + 74, 74, 74, 74, 74, 74, 30, 74, + 31, 32, 33, 74, 74, 74, 74, 19, + 38, 39, 40, 76, 74, 18, 19, 20, + 21, 22, 74, 24, 18, 74, 74, 74, + 28, 29, 30, 74, 31, 32, 33, 74, + 74, 74, 74, 19, 38, 39, 40, 76, + 74, 18, 19, 20, 21, 22, 74, 77, + 18, 74, 74, 74, 28, 29, 30, 74, + 31, 32, 33, 74, 74, 74, 74, 19, + 38, 39, 40, 76, 74, 18, 19, 20, + 21, 22, 74, 74, 18, 74, 74, 74, + 28, 29, 30, 74, 31, 32, 33, 74, + 74, 74, 74, 19, 38, 39, 40, 76, + 74, 18, 19, 20, 21, 22, 23, 24, + 18, 74, 74, 74, 28, 29, 30, 74, + 31, 32, 33, 74, 74, 74, 74, 19, + 38, 39, 40, 76, 74, 3, 6, 74, + 74, 75, 74, 74, 74, 74, 74, 74, 18, 19, 20, 21, 22, 23, 24, 18, - 25, 81, 27, 28, 29, 30, 81, 31, - 32, 33, 81, 81, 81, 81, 37, 38, - 39, 40, 6, 81, 3, 81, 81, 81, - 81, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 4, 81, 81, 81, 81, 81, - 81, 81, 19, 20, 21, 22, 81, 81, - 81, 81, 81, 81, 81, 81, 81, 81, - 31, 32, 33, 81, 81, 81, 81, 81, - 38, 39, 40, 84, 81, 3, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, - 86, 86, 86, 4, 86, 87, 81, 14, - 81, 81, 81, 81, 81, 81, 81, 88, - 81, 14, 81, 6, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, - 86, 86, 86, 6, 86, 86, 86, 6, - 86, 9, 81, 81, 81, 9, 81, 81, - 81, 81, 81, 3, 6, 14, 81, 82, - 81, 81, 81, 81, 81, 81, 18, 19, - 20, 21, 22, 23, 24, 18, 25, 26, - 27, 28, 29, 30, 81, 31, 32, 33, - 81, 34, 35, 81, 37, 38, 39, 40, - 6, 81, 3, 6, 81, 81, 82, 81, - 81, 81, 81, 81, 81, 18, 19, 20, - 21, 22, 23, 24, 18, 25, 26, 27, - 28, 29, 30, 81, 31, 32, 33, 81, - 81, 81, 81, 37, 38, 39, 40, 6, - 81, 34, 35, 81, 35, 81, 78, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 78, 79, 80, 9, 86, 86, - 86, 9, 86, 0 + 25, 74, 27, 28, 29, 30, 74, 31, + 32, 33, 74, 74, 74, 74, 37, 38, + 39, 40, 6, 74, 3, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 4, 74, 74, 74, 74, 74, + 74, 74, 19, 20, 21, 22, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, + 31, 32, 33, 74, 74, 74, 74, 74, + 38, 39, 40, 76, 74, 3, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 4, 78, 79, 74, 14, + 74, 6, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, + 78, 6, 78, 78, 78, 6, 78, 9, + 74, 74, 74, 9, 74, 74, 74, 74, + 74, 3, 6, 14, 74, 75, 74, 74, + 74, 74, 74, 74, 18, 19, 20, 21, + 22, 23, 24, 18, 25, 26, 27, 28, + 29, 30, 74, 31, 32, 33, 74, 34, + 35, 74, 37, 38, 39, 40, 6, 74, + 3, 6, 74, 74, 75, 74, 74, 74, + 74, 74, 74, 18, 19, 20, 21, 22, + 23, 24, 18, 25, 26, 27, 28, 29, + 30, 74, 31, 32, 33, 74, 74, 74, + 74, 37, 38, 39, 40, 6, 74, 34, + 35, 74, 35, 74, 9, 78, 78, 78, + 9, 78, 0 }; static const char _use_syllable_machine_trans_targs[] = { - 5, 9, 5, 41, 2, 5, 1, 53, - 6, 7, 5, 34, 37, 63, 64, 67, - 68, 72, 43, 44, 45, 46, 47, 57, - 58, 60, 69, 61, 54, 55, 56, 50, - 51, 52, 70, 71, 73, 62, 48, 49, - 5, 5, 5, 5, 8, 0, 33, 12, - 13, 14, 15, 16, 27, 28, 30, 31, - 24, 25, 26, 19, 20, 21, 32, 17, - 18, 5, 11, 5, 10, 22, 5, 23, - 29, 5, 35, 36, 5, 38, 39, 40, - 5, 5, 3, 42, 4, 59, 5, 65, - 66 + 5, 8, 5, 36, 2, 5, 1, 47, + 5, 6, 5, 31, 33, 57, 58, 60, + 61, 34, 37, 38, 39, 40, 41, 51, + 52, 54, 62, 55, 48, 49, 50, 44, + 45, 46, 63, 64, 65, 56, 42, 43, + 5, 5, 7, 0, 10, 11, 12, 13, + 14, 25, 26, 28, 29, 22, 23, 24, + 17, 18, 19, 30, 15, 16, 5, 5, + 9, 20, 5, 21, 27, 5, 32, 5, + 35, 5, 5, 3, 4, 53, 5, 59 }; static const char _use_syllable_machine_trans_actions[] = { 1, 0, 2, 3, 0, 4, 0, 5, - 0, 5, 8, 0, 5, 9, 0, 9, + 8, 5, 9, 0, 5, 10, 0, 10, 3, 0, 5, 5, 0, 0, 0, 5, 5, 5, 3, 3, 5, 5, 5, 5, 5, 5, 0, 0, 0, 3, 0, 0, - 10, 11, 12, 13, 5, 0, 5, 0, - 0, 0, 0, 0, 0, 0, 0, 5, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 14, 5, 15, 0, 0, 16, 0, - 0, 17, 0, 0, 18, 5, 0, 0, - 19, 20, 0, 3, 0, 5, 21, 0, - 0 + 11, 12, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 13, 14, + 0, 0, 15, 0, 0, 16, 0, 17, + 0, 18, 19, 0, 0, 5, 20, 0 }; static const char _use_syllable_machine_to_state_actions[] = { @@ -332,7 +301,6 @@ static const char _use_syllable_machine_to_state_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -345,21 +313,19 @@ static const char _use_syllable_machine_from_state_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const short _use_syllable_machine_eof_trans[] = { - 1, 3, 3, 6, 6, 0, 42, 44, - 44, 68, 68, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 71, 44, - 44, 44, 44, 44, 44, 44, 44, 44, - 68, 44, 74, 77, 74, 44, 44, 81, - 81, 82, 82, 82, 82, 82, 82, 82, - 82, 82, 82, 82, 82, 82, 82, 82, - 82, 82, 82, 82, 82, 82, 82, 87, - 82, 82, 82, 87, 82, 82, 82, 82, - 81, 87 + 1, 3, 3, 6, 6, 0, 42, 42, + 64, 64, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 67, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 64, 70, + 72, 42, 74, 74, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, + 75, 79, 75, 75, 79, 75, 75, 75, + 75, 79 }; static const int use_syllable_machine_start = 5; @@ -369,30 +335,53 @@ static const int use_syllable_machine_error = -1; static const int use_syllable_machine_en_main = 5; -#line 38 "hb-ot-shape-complex-use-machine.rl" +#line 39 "hb-ot-shape-complex-use-machine.rl" -#line 162 "hb-ot-shape-complex-use-machine.rl" +#line 161 "hb-ot-shape-complex-use-machine.rl" #define found_syllable(syllable_type) \ HB_STMT_START { \ - if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ - for (unsigned int i = ts; i < te; i++) \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \ + for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \ info[i].syllable() = (syllable_serial << 4) | use_##syllable_type; \ syllable_serial++; \ if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ } HB_STMT_END +static bool +not_standard_default_ignorable (const hb_glyph_info_t &i) +{ return !((i.use_category() == USE_O || i.use_category() == USE_Rsv) && _hb_glyph_info_is_default_ignorable (&i)); } + static void find_syllables_use (hb_buffer_t *buffer) { - unsigned int p, pe, eof, ts, te, act; - int cs; hb_glyph_info_t *info = buffer->info; + auto p = + + hb_iter (info, buffer->len) + | hb_enumerate + | hb_filter (not_standard_default_ignorable, hb_second) + | hb_filter ([&] (const hb_pair_t p) + { + if (p.second.use_category() == USE_ZWNJ) + for (unsigned i = p.first + 1; i < buffer->len; ++i) + if (not_standard_default_ignorable (info[i])) + return !_hb_glyph_info_is_unicode_mark (&info[i]); + return true; + }) + | hb_enumerate + | machine_index + ; + auto pe = p + p.len (); + auto eof = +pe; + auto ts = +p; + auto te = +p; + unsigned int act; + int cs; -#line 396 "hb-ot-shape-complex-use-machine.hh" +#line 385 "hb-ot-shape-complex-use-machine.hh" { cs = use_syllable_machine_start; ts = 0; @@ -400,15 +389,12 @@ find_syllables_use (hb_buffer_t *buffer) act = 0; } -#line 182 "hb-ot-shape-complex-use-machine.rl" +#line 204 "hb-ot-shape-complex-use-machine.rl" - p = 0; - pe = eof = buffer->len; - unsigned int syllable_serial = 1; -#line 412 "hb-ot-shape-complex-use-machine.hh" +#line 398 "hb-ot-shape-complex-use-machine.hh" { int _slen; int _trans; @@ -422,16 +408,16 @@ _resume: #line 1 "NONE" {ts = p;} break; -#line 426 "hb-ot-shape-complex-use-machine.hh" +#line 412 "hb-ot-shape-complex-use-machine.hh" } _keys = _use_syllable_machine_trans_keys + (cs<<1); _inds = _use_syllable_machine_indicies + _use_syllable_machine_index_offsets[cs]; _slen = _use_syllable_machine_key_spans[cs]; - _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].use_category()) && - ( info[p].use_category()) <= _keys[1] ? - ( info[p].use_category()) - _keys[0] : _slen ]; + _trans = _inds[ _slen > 0 && _keys[0] <=( (*p).second.second.use_category()) && + ( (*p).second.second.use_category()) <= _keys[1] ? + ( (*p).second.second.use_category()) - _keys[0] : _slen ]; _eof_trans: cs = _use_syllable_machine_trans_targs[_trans]; @@ -444,64 +430,60 @@ _eof_trans: #line 1 "NONE" {te = p+1;} break; - case 12: -#line 150 "hb-ot-shape-complex-use-machine.rl" + case 8: +#line 149 "hb-ot-shape-complex-use-machine.rl" {te = p+1;{ found_syllable (independent_cluster); }} break; - case 14: -#line 153 "hb-ot-shape-complex-use-machine.rl" + case 13: +#line 152 "hb-ot-shape-complex-use-machine.rl" {te = p+1;{ found_syllable (standard_cluster); }} break; - case 10: -#line 157 "hb-ot-shape-complex-use-machine.rl" + case 11: +#line 156 "hb-ot-shape-complex-use-machine.rl" {te = p+1;{ found_syllable (broken_cluster); }} break; - case 8: -#line 158 "hb-ot-shape-complex-use-machine.rl" + case 9: +#line 157 "hb-ot-shape-complex-use-machine.rl" {te = p+1;{ found_syllable (non_cluster); }} break; - case 11: + case 14: #line 150 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (independent_cluster); }} + {te = p;p--;{ found_syllable (virama_terminated_cluster); }} break; case 15: #line 151 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (virama_terminated_cluster); }} - break; - case 16: -#line 152 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (sakot_terminated_cluster); }} break; - case 13: -#line 153 "hb-ot-shape-complex-use-machine.rl" + case 12: +#line 152 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (standard_cluster); }} break; - case 18: -#line 154 "hb-ot-shape-complex-use-machine.rl" + case 17: +#line 153 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (number_joiner_terminated_cluster); }} break; - case 17: -#line 155 "hb-ot-shape-complex-use-machine.rl" + case 16: +#line 154 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (numeral_cluster); }} break; + case 18: +#line 155 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (symbol_cluster); }} + break; case 19: #line 156 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (symbol_cluster); }} + {te = p;p--;{ found_syllable (broken_cluster); }} break; case 20: #line 157 "hb-ot-shape-complex-use-machine.rl" - {te = p;p--;{ found_syllable (broken_cluster); }} - break; - case 21: -#line 158 "hb-ot-shape-complex-use-machine.rl" {te = p;p--;{ found_syllable (non_cluster); }} break; case 1: -#line 153 "hb-ot-shape-complex-use-machine.rl" +#line 152 "hb-ot-shape-complex-use-machine.rl" {{p = ((te))-1;}{ found_syllable (standard_cluster); }} break; case 4: -#line 157 "hb-ot-shape-complex-use-machine.rl" +#line 156 "hb-ot-shape-complex-use-machine.rl" {{p = ((te))-1;}{ found_syllable (broken_cluster); }} break; case 2: @@ -519,16 +501,16 @@ _eof_trans: case 3: #line 1 "NONE" {te = p+1;} -#line 157 "hb-ot-shape-complex-use-machine.rl" +#line 156 "hb-ot-shape-complex-use-machine.rl" {act = 8;} break; - case 9: + case 10: #line 1 "NONE" {te = p+1;} -#line 158 "hb-ot-shape-complex-use-machine.rl" +#line 157 "hb-ot-shape-complex-use-machine.rl" {act = 9;} break; -#line 532 "hb-ot-shape-complex-use-machine.hh" +#line 514 "hb-ot-shape-complex-use-machine.hh" } _again: @@ -537,7 +519,7 @@ _again: #line 1 "NONE" {ts = 0;} break; -#line 541 "hb-ot-shape-complex-use-machine.hh" +#line 523 "hb-ot-shape-complex-use-machine.hh" } if ( ++p != pe ) @@ -553,7 +535,7 @@ _again: } -#line 190 "hb-ot-shape-complex-use-machine.rl" +#line 209 "hb-ot-shape-complex-use-machine.rl" } diff --git a/src/hb-ot-shape-complex-use-machine.rl b/src/hb-ot-shape-complex-use-machine.rl index 9b75b5c6e..e04459ed5 100644 --- a/src/hb-ot-shape-complex-use-machine.rl +++ b/src/hb-ot-shape-complex-use-machine.rl @@ -30,6 +30,7 @@ #define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH #include "hb.hh" +#include "hb-ot-shape-complex-machine-index.hh" %%{ machine use_syllable_machine; @@ -47,7 +48,6 @@ B = 1; # BASE IND = 3; # BASE_IND N = 4; # BASE_NUM GB = 5; # BASE_OTHER -CGJ = 6; # CGJ #F = 7; # CONS_FINAL #FM = 8; # CONS_FINAL_MOD #M = 9; # CONS_MED @@ -63,7 +63,6 @@ Rsv = 17; # Reserved characters R = 18; # REPHA S = 19; # SYM #SM = 20; # SYM_MOD -VS = 21; # VARIATION_SELECTOR #V = 36; # VOWEL #VM = 40; # VOWEL_MOD CS = 43; # CONS_WITH_STACKER @@ -96,7 +95,7 @@ FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable h = H | HVM | Sk; # Override: Adhoc ZWJ placement. https://github.com/harfbuzz/harfbuzz/issues/542#issuecomment-353169729 -consonant_modifiers = CMAbv* CMBlw* ((ZWJ?.h.ZWJ? B | SUB) VS? CMAbv? CMBlw*)*; +consonant_modifiers = CMAbv* CMBlw* ((ZWJ?.h.ZWJ? B | SUB) CMAbv? CMBlw*)*; # Override: Allow two MBlw. https://github.com/harfbuzz/harfbuzz/issues/376 medial_consonants = MPre? MAbv? MBlw?.MBlw? MPst?; dependent_vowels = VPre* VAbv* VBlw* VPst*; @@ -104,7 +103,7 @@ vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*; final_consonants = FAbv* FBlw* FPst*; final_modifiers = FMAbv* FMBlw* | FMPst?; -complex_syllable_start = (R | CS)? (B | GB) VS?; +complex_syllable_start = (R | CS)? (B | GB); complex_syllable_middle = consonant_modifiers medial_consonants @@ -117,8 +116,8 @@ complex_syllable_tail = final_consonants final_modifiers ; -number_joiner_terminated_cluster_tail = (HN N VS?)* HN; -numeral_cluster_tail = (HN N VS?)+; +number_joiner_terminated_cluster_tail = (HN N)* HN; +numeral_cluster_tail = (HN N)+; symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+; virama_terminated_cluster = @@ -140,10 +139,10 @@ broken_cluster = (complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail) ; -number_joiner_terminated_cluster = N VS? number_joiner_terminated_cluster_tail; -numeral_cluster = N VS? numeral_cluster_tail?; -symbol_cluster = (S | GB) VS? symbol_cluster_tail?; -independent_cluster = (IND | O | Rsv | WJ) VS?; +number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail; +numeral_cluster = N numeral_cluster_tail?; +symbol_cluster = (S | GB) symbol_cluster_tail?; +independent_cluster = (IND | O | Rsv | WJ); other = any; main := |* @@ -163,27 +162,47 @@ main := |* #define found_syllable(syllable_type) \ HB_STMT_START { \ - if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ - for (unsigned int i = ts; i < te; i++) \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \ + for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \ info[i].syllable() = (syllable_serial << 4) | use_##syllable_type; \ syllable_serial++; \ if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ } HB_STMT_END +static bool +not_standard_default_ignorable (const hb_glyph_info_t &i) +{ return !((i.use_category() == USE_O || i.use_category() == USE_Rsv) && _hb_glyph_info_is_default_ignorable (&i)); } + static void find_syllables_use (hb_buffer_t *buffer) { - unsigned int p, pe, eof, ts, te, act; - int cs; hb_glyph_info_t *info = buffer->info; + auto p = + + hb_iter (info, buffer->len) + | hb_enumerate + | hb_filter (not_standard_default_ignorable, hb_second) + | hb_filter ([&] (const hb_pair_t p) + { + if (p.second.use_category() == USE_ZWNJ) + for (unsigned i = p.first + 1; i < buffer->len; ++i) + if (not_standard_default_ignorable (info[i])) + return !_hb_glyph_info_is_unicode_mark (&info[i]); + return true; + }) + | hb_enumerate + | machine_index + ; + auto pe = p + p.len (); + auto eof = +pe; + auto ts = +p; + auto te = +p; + unsigned int act; + int cs; %%{ write init; - getkey info[p].use_category(); + getkey (*p).second.second.use_category(); }%% - p = 0; - pe = eof = buffer->len; - unsigned int syllable_serial = 1; %%{ write exec; diff --git a/src/hb-ot-shape-complex-use-table.cc b/src/hb-ot-shape-complex-use-table.cc index aa9c35086..7fc735ab3 100644 --- a/src/hb-ot-shape-complex-use-table.cc +++ b/src/hb-ot-shape-complex-use-table.cc @@ -24,7 +24,6 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-macros" #define B USE_B /* BASE */ -#define CGJ USE_CGJ /* CGJ */ #define CS USE_CS /* CONS_WITH_STACKER */ #define GB USE_GB /* BASE_OTHER */ #define H USE_H /* HALANT */ @@ -38,7 +37,6 @@ #define S USE_S /* SYM */ #define SUB USE_SUB /* CONS_SUB */ #define Sk USE_Sk /* SAKOT */ -#define VS USE_VS /* VARIATION_SELECTOR */ #define WJ USE_WJ /* Word_Joiner */ #define ZWJ USE_ZWJ /* ZWJ */ #define ZWNJ USE_ZWNJ /* ZWNJ */ @@ -86,13 +84,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 00C0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 00D0 */ O, O, O, O, O, O, O, GB, -#define use_offset_0x0348u 80 - - - /* Combining Diacritical Marks */ - O, O, O, O, O, O, O, CGJ, - -#define use_offset_0x0900u 88 +#define use_offset_0x0900u 80 /* Devanagari */ @@ -205,7 +197,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 0DE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B, /* 0DF0 */ O, O, VPst, VPst, O, O, O, O, -#define use_offset_0x0f18u 1360 +#define use_offset_0x0f18u 1352 /* Tibetan */ @@ -222,7 +214,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 0FB0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, O, O, /* 0FC0 */ O, O, O, O, O, O, FMBlw, O, -#define use_offset_0x1000u 1536 +#define use_offset_0x1000u 1528 /* Myanmar */ @@ -238,7 +230,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 1080 */ B, B, MBlw, VPst, VPre, VAbv, VAbv, VMPst, VMPst, VMPst, VMPst, VMPst, VMPst, VMBlw, B, VMPst, /* 1090 */ B, B, B, B, B, B, B, B, B, B, VMPst, VMPst, VPst, VAbv, O, O, -#define use_offset_0x1700u 1696 +#define use_offset_0x1700u 1688 /* Tagalog */ @@ -271,7 +263,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 17D0 */ FMAbv, VAbv, H, FMAbv, O, O, O, O, O, O, O, O, B, FMAbv, O, O, /* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x1900u 1936 +#define use_offset_0x1900u 1928 /* Limbu */ @@ -315,7 +307,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 1A80 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, /* 1A90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x1b00u 2352 +#define use_offset_0x1b00u 2344 /* Balinese */ @@ -351,7 +343,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 1C30 */ FAbv, FAbv, FAbv, FAbv, VMPre, VMPre, FMAbv, CMBlw, O, O, O, O, O, O, O, O, /* 1C40 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, B, B, -#define use_offset_0x1cd0u 2688 +#define use_offset_0x1cd0u 2680 /* Vedic Extensions */ @@ -360,20 +352,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 1CE0 */ VMAbv, VMPst, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, O, O, O, O, VMBlw, O, O, /* 1CF0 */ O, O, IND, IND, VMAbv, CS, CS, VMPst, VMAbv, VMAbv, GB, O, O, O, O, O, -#define use_offset_0x1df8u 2736 +#define use_offset_0x1df8u 2728 /* Combining Diacritical Marks Supplement */ O, O, O, FMAbv, O, O, O, O, -#define use_offset_0x2008u 2744 +#define use_offset_0x2008u 2736 /* General Punctuation */ O, O, O, O, ZWNJ, ZWJ, O, O, /* 2010 */ GB, GB, GB, GB, GB, O, O, O, -#define use_offset_0x2060u 2760 +#define use_offset_0x2060u 2752 /* 2060 */ WJ, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, @@ -382,20 +374,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 2070 */ O, O, O, O, FMPst, O, O, O, O, O, O, O, O, O, O, O, /* 2080 */ O, O, FMPst, FMPst, FMPst, O, O, O, -#define use_offset_0x20f0u 2800 +#define use_offset_0x20f0u 2792 /* Combining Diacritical Marks for Symbols */ /* 20F0 */ VMAbv, O, O, O, O, O, O, O, -#define use_offset_0x25c8u 2808 +#define use_offset_0x25c8u 2800 /* Geometric Shapes */ O, O, O, O, GB, O, O, O, -#define use_offset_0xa800u 2816 +#define use_offset_0xa800u 2808 /* Syloti Nagri */ @@ -482,7 +474,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* AAE0 */ B, B, B, B, B, B, B, B, B, B, B, VPre, VBlw, VAbv, VPre, VPst, /* AAF0 */ O, O, O, O, O, VMPst, H, O, -#define use_offset_0xabc0u 3576 +#define use_offset_0xabc0u 3568 /* Meetei Mayek */ @@ -492,14 +484,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O, /* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0xfe00u 3640 - - - /* Variation Selectors */ - - /* FE00 */ VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, - -#define use_offset_0x10a00u 3656 +#define use_offset_0x10a00u 3632 /* Kharoshthi */ @@ -510,7 +495,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H, /* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, -#define use_offset_0x11000u 3736 +#define use_offset_0x11000u 3712 /* Brahmi */ @@ -531,7 +516,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O, -#define use_offset_0x11100u 3928 +#define use_offset_0x11100u 3904 /* Chakma */ @@ -569,7 +554,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw, /* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O, -#define use_offset_0x11280u 4248 +#define use_offset_0x11280u 4224 /* Multani */ @@ -597,7 +582,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, /* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, -#define use_offset_0x11400u 4496 +#define use_offset_0x11400u 4472 /* Newa */ @@ -620,7 +605,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 114C0 */ VMAbv, VMAbv, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O, /* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x11580u 4720 +#define use_offset_0x11580u 4696 /* Siddham */ @@ -663,7 +648,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11720 */ VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VAbv, VBlw, VAbv, VAbv, VAbv, O, O, O, O, /* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, -#define use_offset_0x11800u 5168 +#define use_offset_0x11800u 5144 /* Dogra */ @@ -673,7 +658,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw, /* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O, -#define use_offset_0x11900u 5232 +#define use_offset_0x11900u 5208 /* Dives Akuru */ @@ -685,7 +670,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11940 */ MPst, R, MBlw, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O, /* 11950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x119a0u 5328 +#define use_offset_0x119a0u 5304 /* Nandinagari */ @@ -713,7 +698,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11A80 */ B, B, B, B, R, R, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, /* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O, -#define use_offset_0x11c00u 5584 +#define use_offset_0x11c00u 5560 /* Bhaiksuki */ @@ -734,7 +719,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, /* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O, -#define use_offset_0x11d00u 5768 +#define use_offset_0x11d00u 5744 /* Masaram Gondi */ @@ -754,7 +739,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O, /* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, -#define use_offset_0x11ee0u 5944 +#define use_offset_0x11ee0u 5920 /* Makasar */ @@ -762,7 +747,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { /* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O, -}; /* Table items: 5968; occupancy: 74% */ +}; /* Table items: 5944; occupancy: 74% */ USE_TABLE_ELEMENT_TYPE hb_use_get_category (hb_codepoint_t u) @@ -772,7 +757,6 @@ hb_use_get_category (hb_codepoint_t u) case 0x0u: if (hb_in_range (u, 0x0028u, 0x003Fu)) return use_table[u - 0x0028u + use_offset_0x0028u]; if (hb_in_range (u, 0x00A0u, 0x00D7u)) return use_table[u - 0x00A0u + use_offset_0x00a0u]; - if (hb_in_range (u, 0x0348u, 0x034Fu)) return use_table[u - 0x0348u + use_offset_0x0348u]; if (hb_in_range (u, 0x0900u, 0x0DF7u)) return use_table[u - 0x0900u + use_offset_0x0900u]; if (hb_in_range (u, 0x0F18u, 0x0FC7u)) return use_table[u - 0x0F18u + use_offset_0x0f18u]; break; @@ -798,10 +782,6 @@ hb_use_get_category (hb_codepoint_t u) if (hb_in_range (u, 0xABC0u, 0xABFFu)) return use_table[u - 0xABC0u + use_offset_0xabc0u]; break; - case 0xFu: - if (hb_in_range (u, 0xFE00u, 0xFE0Fu)) return use_table[u - 0xFE00u + use_offset_0xfe00u]; - break; - case 0x10u: if (hb_in_range (u, 0x10A00u, 0x10A4Fu)) return use_table[u - 0x10A00u + use_offset_0x10a00u]; break; @@ -827,7 +807,6 @@ hb_use_get_category (hb_codepoint_t u) } #undef B -#undef CGJ #undef CS #undef GB #undef H @@ -841,7 +820,6 @@ hb_use_get_category (hb_codepoint_t u) #undef S #undef SUB #undef Sk -#undef VS #undef WJ #undef ZWJ #undef ZWNJ diff --git a/src/hb-ot-shape-complex-use.hh b/src/hb-ot-shape-complex-use.hh index ce6645ecd..f88997f87 100644 --- a/src/hb-ot-shape-complex-use.hh +++ b/src/hb-ot-shape-complex-use.hh @@ -49,7 +49,6 @@ enum use_category_t { USE_IND = 3, /* BASE_IND */ USE_N = 4, /* BASE_NUM */ USE_GB = 5, /* BASE_OTHER */ - USE_CGJ = 6, /* CGJ */ // USE_F = 7, /* CONS_FINAL */ USE_FM = 8, /* CONS_FINAL_MOD */ // USE_M = 9, /* CONS_MED */ @@ -65,7 +64,6 @@ enum use_category_t { USE_R = 18, /* REPHA */ USE_S = 19, /* SYM */ // USE_SM = 20, /* SYM_MOD */ - USE_VS = 21, /* VARIATION_SELECTOR */ // USE_V = 36, /* VOWEL */ // USE_VM = 40, /* VOWEL_MOD */ USE_CS = 43, /* CONS_WITH_STACKER */