[use] Skip most default ignorables when clustering

This commit is contained in:
David Corbett 2019-05-03 14:16:50 -04:00 committed by Behdad Esfahbod
parent 645f4e7cdd
commit 6a38adeb57
7 changed files with 423 additions and 386 deletions

View File

@ -118,6 +118,7 @@ HB_BASE_sources = \
hb-ot-shape-complex-indic.hh \
hb-ot-shape-complex-khmer.cc \
hb-ot-shape-complex-khmer.hh \
hb-ot-shape-complex-machine-index.hh \
hb-ot-shape-complex-myanmar.cc \
hb-ot-shape-complex-myanmar.hh \
hb-ot-shape-complex-thai.cc \

View File

@ -51,7 +51,6 @@ for i, f in enumerate (files):
defaults = ('Other', 'Not_Applicable', 'Cn', 'No_Block')
# TODO Characters that are not in Unicode Indic files, but used in USE
data[0][0x034F] = defaults[0]
data[0][0x1B61] = defaults[0]
data[0][0x1B63] = defaults[0]
data[0][0x1B64] = defaults[0]
@ -72,8 +71,6 @@ data[0][0x11C44] = 'Consonant_Placeholder'
data[0][0x11C45] = 'Consonant_Placeholder'
# TODO https://github.com/harfbuzz/harfbuzz/pull/1399
data[0][0x111C8] = 'Consonant_Placeholder'
for u in range (0xFE00, 0xFE0F + 1):
data[0][u] = defaults[0]
# Merge data into one dict:
for i,v in enumerate (defaults):
@ -194,8 +191,6 @@ def is_BASE_OTHER(U, UISC, UGC):
if UISC == Consonant_Placeholder: return True #SPEC-DRAFT
#SPEC-DRAFT return U in [0x00A0, 0x00D7, 0x2015, 0x2022, 0x25CC, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
def is_CGJ(U, UISC, UGC):
return U == 0x034F
def is_CONS_FINAL(U, UISC, UGC):
return ((UISC == Consonant_Final and UGC != Lo) or
UISC == Consonant_Succeeding_Repha)
@ -234,9 +229,7 @@ def is_OTHER(U, UISC, UGC):
return (UISC == Other
and not is_SYM(U, UISC, UGC)
and not is_SYM_MOD(U, UISC, UGC)
and not is_CGJ(U, UISC, UGC)
and not is_Word_Joiner(U, UISC, UGC)
and not is_VARIATION_SELECTOR(U, UISC, UGC)
)
def is_Reserved(U, UISC, UGC):
return UGC == 'Cn'
@ -250,8 +243,6 @@ def is_SYM(U, UISC, UGC):
return UGC in [So, Sc] and U not in [0x1B62, 0x1B68]
def is_SYM_MOD(U, UISC, UGC):
return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
def is_VARIATION_SELECTOR(U, UISC, UGC):
return 0xFE00 <= U <= 0xFE0F
def is_VOWEL(U, UISC, UGC):
# https://github.com/harfbuzz/harfbuzz/issues/376
return (UISC == Pure_Killer or
@ -261,12 +252,12 @@ def is_VOWEL_MOD(U, UISC, UGC):
return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
(UGC != Lo and (UISC == Bindu or U in [0xAA29])))
# CGJ and VS are handled in find_syllables
use_mapping = {
'B': is_BASE,
'IND': is_BASE_IND,
'N': is_BASE_NUM,
'GB': is_BASE_OTHER,
'CGJ': is_CGJ,
'F': is_CONS_FINAL,
'FM': is_CONS_FINAL_MOD,
'M': is_CONS_MED,
@ -285,7 +276,6 @@ use_mapping = {
'S': is_SYM,
'Sk': is_SAKOT,
'SM': is_SYM_MOD,
'VS': is_VARIATION_SELECTOR,
'V': is_VOWEL,
'VM': is_VOWEL_MOD,
}

View File

@ -0,0 +1,69 @@
/*
* Copyright © 2019,2020 David Corbett
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*/
#ifndef HB_OT_SHAPE_COMPLEX_MACHINE_INDEX_HH
#define HB_OT_SHAPE_COMPLEX_MACHINE_INDEX_HH
#include "hb.hh"
template <typename Iter>
struct machine_index_t :
hb_iter_with_fallback_t<machine_index_t<Iter>,
typename Iter::item_t>
{
machine_index_t (const Iter& it) : it (it) {}
machine_index_t (const machine_index_t& o) : it (o.it) {}
static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator;
static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator;
typename Iter::item_t __item__ () const { return *it; }
typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; }
unsigned __len__ () const { return it.len (); }
void __next__ () { ++it; }
void __forward__ (unsigned n) { it += n; }
void __prev__ () { --it; }
void __rewind__ (unsigned n) { it -= n; }
void operator = (unsigned n)
{ unsigned index = (*it).first; if (index < n) it += n - index; else if (index > n) it -= index - n; }
void operator = (const machine_index_t& o) { *this = (*o.it).first; }
bool operator == (const machine_index_t& o) const { return (*it).first == (*o.it).first; }
bool operator != (const machine_index_t& o) const { return !(*this == o); }
private:
Iter it;
};
struct
{
template <typename Iter,
hb_requires (hb_is_iterable (Iter))>
machine_index_t<hb_iter_type<Iter>>
operator () (Iter&& it) const
{ return machine_index_t<hb_iter_type<Iter>> (hb_iter (it)); }
}
HB_FUNCOBJ (machine_index);
#endif /* HB_OT_SHAPE_COMPLEX_MACHINE_INDEX_HH */

View File

@ -32,46 +32,44 @@
#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
#include "hb.hh"
#include "hb-ot-shape-complex-machine-index.hh"
#line 38 "hb-ot-shape-complex-use-machine.hh"
#line 39 "hb-ot-shape-complex-use-machine.hh"
static const unsigned char _use_syllable_machine_trans_keys[] = {
12u, 48u, 1u, 15u, 1u, 1u, 12u, 48u, 1u, 1u, 0u, 48u, 21u, 21u, 11u, 48u,
11u, 48u, 1u, 15u, 1u, 1u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u,
26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 1u, 1u, 24u, 48u,
12u, 48u, 1u, 15u, 1u, 1u, 12u, 48u, 1u, 1u, 0u, 48u, 11u, 48u, 11u, 48u,
1u, 15u, 1u, 1u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u, 45u, 46u,
46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 1u, 1u, 24u, 48u, 23u, 48u, 23u, 48u,
23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, 1u, 48u, 13u, 13u,
4u, 4u, 11u, 48u, 41u, 42u, 42u, 42u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u,
25u, 47u, 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 24u, 48u,
23u, 48u, 23u, 48u, 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u,
1u, 48u, 11u, 48u, 13u, 21u, 4u, 4u, 13u, 13u, 11u, 48u, 11u, 48u, 41u, 42u,
42u, 42u, 11u, 48u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u,
45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 24u, 48u, 23u, 48u, 23u, 48u,
23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, 1u, 48u, 1u, 15u,
4u, 4u, 13u, 21u, 13u, 13u, 12u, 48u, 1u, 48u, 11u, 48u, 41u, 42u, 42u, 42u,
21u, 42u, 1u, 5u, 0
1u, 48u, 1u, 15u, 4u, 4u, 13u, 13u, 12u, 48u, 1u, 48u, 11u, 48u, 41u, 42u,
42u, 42u, 1u, 5u, 0
};
static const char _use_syllable_machine_key_spans[] = {
37, 15, 1, 37, 1, 49, 1, 38,
38, 15, 1, 38, 27, 26, 24, 23,
22, 2, 1, 25, 25, 25, 1, 25,
37, 15, 1, 37, 1, 49, 38, 38,
15, 1, 27, 26, 24, 23, 22, 2,
1, 25, 25, 25, 1, 25, 26, 26,
26, 27, 27, 27, 27, 38, 48, 1,
1, 38, 2, 1, 38, 27, 26, 24,
23, 22, 2, 1, 25, 25, 25, 25,
26, 26, 26, 27, 27, 27, 27, 38,
48, 38, 9, 1, 1, 38, 38, 2,
1, 38, 38, 27, 26, 24, 23, 22,
2, 1, 25, 25, 25, 25, 26, 26,
26, 27, 27, 27, 27, 38, 48, 15,
1, 9, 1, 37, 48, 38, 2, 1,
22, 5
48, 15, 1, 1, 37, 48, 38, 2,
1, 5
};
static const short _use_syllable_machine_index_offsets[] = {
0, 38, 54, 56, 94, 96, 146, 148,
187, 226, 242, 244, 283, 311, 338, 363,
387, 410, 413, 415, 441, 467, 493, 495,
521, 548, 575, 602, 630, 658, 686, 714,
753, 802, 841, 851, 853, 855, 894, 933,
936, 938, 977, 1016, 1044, 1071, 1096, 1120,
1143, 1146, 1148, 1174, 1200, 1226, 1252, 1279,
1306, 1333, 1361, 1389, 1417, 1445, 1484, 1533,
1549, 1551, 1561, 1563, 1601, 1650, 1689, 1692,
1694, 1717
0, 38, 54, 56, 94, 96, 146, 185,
224, 240, 242, 270, 297, 322, 346, 369,
372, 374, 400, 426, 452, 454, 480, 507,
534, 561, 589, 617, 645, 673, 712, 761,
763, 765, 804, 807, 809, 848, 876, 903,
928, 952, 975, 978, 980, 1006, 1032, 1058,
1084, 1111, 1138, 1165, 1193, 1221, 1249, 1277,
1316, 1365, 1381, 1383, 1385, 1423, 1472, 1511,
1514, 1516
};
static const char _use_syllable_machine_indicies[] = {
@ -93,234 +91,205 @@ static const char _use_syllable_machine_indicies[] = {
20, 21, 22, 23, 24, 18, 25, 26,
27, 28, 29, 30, 10, 31, 32, 33,
10, 34, 35, 36, 37, 38, 39, 40,
13, 10, 42, 41, 44, 1, 43, 43,
45, 43, 43, 43, 43, 43, 46, 47,
48, 49, 50, 51, 52, 53, 47, 54,
46, 55, 56, 57, 58, 43, 59, 60,
61, 43, 43, 43, 43, 62, 63, 64,
65, 1, 43, 44, 1, 43, 43, 45,
43, 43, 43, 43, 43, 66, 47, 48,
49, 50, 51, 52, 53, 47, 54, 55,
55, 56, 57, 58, 43, 59, 60, 61,
43, 43, 43, 43, 62, 63, 64, 65,
1, 43, 44, 67, 67, 67, 67, 67,
67, 67, 67, 67, 67, 67, 67, 67,
68, 67, 44, 67, 44, 1, 43, 43,
45, 43, 43, 43, 43, 43, 43, 47,
48, 49, 50, 51, 52, 53, 47, 54,
55, 55, 56, 57, 58, 43, 59, 60,
61, 43, 43, 43, 43, 62, 63, 64,
65, 1, 43, 47, 48, 49, 50, 51,
43, 43, 43, 43, 43, 43, 56, 57,
58, 43, 59, 60, 61, 43, 43, 43,
43, 48, 63, 64, 65, 69, 43, 48,
49, 50, 51, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 59, 60, 61,
43, 43, 43, 43, 43, 63, 64, 65,
69, 43, 49, 50, 51, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 63,
64, 65, 43, 50, 51, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 63,
64, 65, 43, 51, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 63, 64,
65, 43, 63, 64, 43, 64, 43, 49,
50, 51, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 59, 60, 61, 43,
43, 43, 43, 43, 63, 64, 65, 69,
43, 49, 50, 51, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 60,
61, 43, 43, 43, 43, 43, 63, 64,
65, 69, 43, 49, 50, 51, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43,
43, 43, 61, 43, 43, 43, 43, 43,
63, 64, 65, 69, 43, 71, 70, 49,
50, 51, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 63, 64, 65, 69,
43, 48, 49, 50, 51, 43, 43, 43,
43, 43, 43, 56, 57, 58, 43, 59,
60, 61, 43, 43, 43, 43, 48, 63,
64, 65, 69, 43, 48, 49, 50, 51,
43, 43, 43, 43, 43, 43, 43, 57,
58, 43, 59, 60, 61, 43, 43, 43,
43, 48, 63, 64, 65, 69, 43, 48,
49, 50, 51, 43, 43, 43, 43, 43,
43, 43, 43, 58, 43, 59, 60, 61,
43, 43, 43, 43, 48, 63, 64, 65,
69, 43, 47, 48, 49, 50, 51, 43,
53, 47, 43, 43, 43, 56, 57, 58,
43, 59, 60, 61, 43, 43, 43, 43,
48, 63, 64, 65, 69, 43, 47, 48,
49, 50, 51, 43, 72, 47, 43, 43,
43, 56, 57, 58, 43, 59, 60, 61,
43, 43, 43, 43, 48, 63, 64, 65,
69, 43, 47, 48, 49, 50, 51, 43,
43, 47, 43, 43, 43, 56, 57, 58,
43, 59, 60, 61, 43, 43, 43, 43,
48, 63, 64, 65, 69, 43, 47, 48,
49, 50, 51, 52, 53, 47, 43, 43,
43, 56, 57, 58, 43, 59, 60, 61,
43, 43, 43, 43, 48, 63, 64, 65,
69, 43, 44, 1, 43, 43, 45, 43,
43, 43, 43, 43, 43, 47, 48, 49,
50, 51, 52, 53, 47, 54, 43, 55,
56, 57, 58, 43, 59, 60, 61, 43,
43, 43, 43, 62, 63, 64, 65, 1,
43, 44, 67, 67, 67, 67, 67, 67,
67, 67, 67, 67, 67, 67, 67, 68,
67, 67, 67, 67, 67, 67, 67, 48,
49, 50, 51, 67, 67, 67, 67, 67,
67, 67, 67, 67, 67, 59, 60, 61,
67, 67, 67, 67, 67, 63, 64, 65,
69, 67, 44, 1, 43, 43, 45, 43,
43, 43, 43, 43, 43, 47, 48, 49,
50, 51, 52, 53, 47, 54, 46, 55,
56, 57, 58, 43, 59, 60, 61, 43,
43, 43, 43, 62, 63, 64, 65, 1,
43, 74, 73, 73, 73, 73, 73, 73,
73, 75, 73, 11, 76, 74, 73, 44,
1, 43, 43, 45, 43, 43, 43, 43,
43, 77, 47, 48, 49, 50, 51, 52,
53, 47, 54, 46, 55, 56, 57, 58,
43, 59, 60, 61, 43, 78, 79, 43,
62, 63, 64, 65, 1, 43, 44, 1,
43, 43, 45, 43, 43, 43, 43, 43,
43, 47, 48, 49, 50, 51, 52, 53,
47, 54, 46, 55, 56, 57, 58, 43,
59, 60, 61, 43, 78, 79, 43, 62,
63, 64, 65, 1, 43, 78, 79, 80,
79, 80, 3, 6, 81, 81, 82, 81,
81, 81, 81, 81, 83, 18, 19, 20,
21, 22, 23, 24, 18, 25, 27, 27,
28, 29, 30, 81, 31, 32, 33, 81,
81, 81, 81, 37, 38, 39, 40, 6,
81, 3, 6, 81, 81, 82, 81, 81,
81, 81, 81, 81, 18, 19, 20, 21,
13, 10, 42, 1, 41, 41, 43, 41,
41, 41, 41, 41, 41, 44, 45, 46,
47, 48, 49, 50, 44, 51, 9, 52,
53, 54, 55, 41, 56, 57, 58, 41,
41, 41, 41, 59, 60, 61, 62, 1,
41, 42, 1, 41, 41, 43, 41, 41,
41, 41, 41, 41, 44, 45, 46, 47,
48, 49, 50, 44, 51, 52, 52, 53,
54, 55, 41, 56, 57, 58, 41, 41,
41, 41, 59, 60, 61, 62, 1, 41,
42, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 64, 63,
42, 63, 44, 45, 46, 47, 48, 41,
41, 41, 41, 41, 41, 53, 54, 55,
41, 56, 57, 58, 41, 41, 41, 41,
45, 60, 61, 62, 65, 41, 45, 46,
47, 48, 41, 41, 41, 41, 41, 41,
41, 41, 41, 41, 56, 57, 58, 41,
41, 41, 41, 41, 60, 61, 62, 65,
41, 46, 47, 48, 41, 41, 41, 41,
41, 41, 41, 41, 41, 41, 41, 41,
41, 41, 41, 41, 41, 41, 60, 61,
62, 41, 47, 48, 41, 41, 41, 41,
41, 41, 41, 41, 41, 41, 41, 41,
41, 41, 41, 41, 41, 41, 60, 61,
62, 41, 48, 41, 41, 41, 41, 41,
41, 41, 41, 41, 41, 41, 41, 41,
41, 41, 41, 41, 41, 60, 61, 62,
41, 60, 61, 41, 61, 41, 46, 47,
48, 41, 41, 41, 41, 41, 41, 41,
41, 41, 41, 56, 57, 58, 41, 41,
41, 41, 41, 60, 61, 62, 65, 41,
46, 47, 48, 41, 41, 41, 41, 41,
41, 41, 41, 41, 41, 41, 57, 58,
41, 41, 41, 41, 41, 60, 61, 62,
65, 41, 46, 47, 48, 41, 41, 41,
41, 41, 41, 41, 41, 41, 41, 41,
41, 58, 41, 41, 41, 41, 41, 60,
61, 62, 65, 41, 67, 66, 46, 47,
48, 41, 41, 41, 41, 41, 41, 41,
41, 41, 41, 41, 41, 41, 41, 41,
41, 41, 41, 60, 61, 62, 65, 41,
45, 46, 47, 48, 41, 41, 41, 41,
41, 41, 53, 54, 55, 41, 56, 57,
58, 41, 41, 41, 41, 45, 60, 61,
62, 65, 41, 45, 46, 47, 48, 41,
41, 41, 41, 41, 41, 41, 54, 55,
41, 56, 57, 58, 41, 41, 41, 41,
45, 60, 61, 62, 65, 41, 45, 46,
47, 48, 41, 41, 41, 41, 41, 41,
41, 41, 55, 41, 56, 57, 58, 41,
41, 41, 41, 45, 60, 61, 62, 65,
41, 44, 45, 46, 47, 48, 41, 50,
44, 41, 41, 41, 53, 54, 55, 41,
56, 57, 58, 41, 41, 41, 41, 45,
60, 61, 62, 65, 41, 44, 45, 46,
47, 48, 41, 68, 44, 41, 41, 41,
53, 54, 55, 41, 56, 57, 58, 41,
41, 41, 41, 45, 60, 61, 62, 65,
41, 44, 45, 46, 47, 48, 41, 41,
44, 41, 41, 41, 53, 54, 55, 41,
56, 57, 58, 41, 41, 41, 41, 45,
60, 61, 62, 65, 41, 44, 45, 46,
47, 48, 49, 50, 44, 41, 41, 41,
53, 54, 55, 41, 56, 57, 58, 41,
41, 41, 41, 45, 60, 61, 62, 65,
41, 42, 1, 41, 41, 43, 41, 41,
41, 41, 41, 41, 44, 45, 46, 47,
48, 49, 50, 44, 51, 41, 52, 53,
54, 55, 41, 56, 57, 58, 41, 41,
41, 41, 59, 60, 61, 62, 1, 41,
42, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 64, 63,
63, 63, 63, 63, 63, 63, 45, 46,
47, 48, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 56, 57, 58, 63,
63, 63, 63, 63, 60, 61, 62, 65,
63, 70, 69, 11, 71, 42, 1, 41,
41, 43, 41, 41, 41, 41, 41, 41,
44, 45, 46, 47, 48, 49, 50, 44,
51, 9, 52, 53, 54, 55, 41, 56,
57, 58, 41, 17, 72, 41, 59, 60,
61, 62, 1, 41, 17, 72, 73, 72,
73, 3, 6, 74, 74, 75, 74, 74,
74, 74, 74, 74, 18, 19, 20, 21,
22, 23, 24, 18, 25, 27, 27, 28,
29, 30, 81, 31, 32, 33, 81, 81,
81, 81, 37, 38, 39, 40, 6, 81,
18, 19, 20, 21, 22, 81, 81, 81,
81, 81, 81, 28, 29, 30, 81, 31,
32, 33, 81, 81, 81, 81, 19, 38,
39, 40, 84, 81, 19, 20, 21, 22,
81, 81, 81, 81, 81, 81, 81, 81,
81, 81, 31, 32, 33, 81, 81, 81,
81, 81, 38, 39, 40, 84, 81, 20,
21, 22, 81, 81, 81, 81, 81, 81,
81, 81, 81, 81, 81, 81, 81, 81,
81, 81, 81, 81, 38, 39, 40, 81,
21, 22, 81, 81, 81, 81, 81, 81,
81, 81, 81, 81, 81, 81, 81, 81,
81, 81, 81, 81, 38, 39, 40, 81,
22, 81, 81, 81, 81, 81, 81, 81,
81, 81, 81, 81, 81, 81, 81, 81,
81, 81, 81, 38, 39, 40, 81, 38,
39, 81, 39, 81, 20, 21, 22, 81,
81, 81, 81, 81, 81, 81, 81, 81,
81, 31, 32, 33, 81, 81, 81, 81,
81, 38, 39, 40, 84, 81, 20, 21,
22, 81, 81, 81, 81, 81, 81, 81,
81, 81, 81, 81, 32, 33, 81, 81,
81, 81, 81, 38, 39, 40, 84, 81,
20, 21, 22, 81, 81, 81, 81, 81,
81, 81, 81, 81, 81, 81, 81, 33,
81, 81, 81, 81, 81, 38, 39, 40,
84, 81, 20, 21, 22, 81, 81, 81,
81, 81, 81, 81, 81, 81, 81, 81,
81, 81, 81, 81, 81, 81, 81, 38,
39, 40, 84, 81, 19, 20, 21, 22,
81, 81, 81, 81, 81, 81, 28, 29,
30, 81, 31, 32, 33, 81, 81, 81,
81, 19, 38, 39, 40, 84, 81, 19,
20, 21, 22, 81, 81, 81, 81, 81,
81, 81, 29, 30, 81, 31, 32, 33,
81, 81, 81, 81, 19, 38, 39, 40,
84, 81, 19, 20, 21, 22, 81, 81,
81, 81, 81, 81, 81, 81, 30, 81,
31, 32, 33, 81, 81, 81, 81, 19,
38, 39, 40, 84, 81, 18, 19, 20,
21, 22, 81, 24, 18, 81, 81, 81,
28, 29, 30, 81, 31, 32, 33, 81,
81, 81, 81, 19, 38, 39, 40, 84,
81, 18, 19, 20, 21, 22, 81, 85,
18, 81, 81, 81, 28, 29, 30, 81,
31, 32, 33, 81, 81, 81, 81, 19,
38, 39, 40, 84, 81, 18, 19, 20,
21, 22, 81, 81, 18, 81, 81, 81,
28, 29, 30, 81, 31, 32, 33, 81,
81, 81, 81, 19, 38, 39, 40, 84,
81, 18, 19, 20, 21, 22, 23, 24,
18, 81, 81, 81, 28, 29, 30, 81,
31, 32, 33, 81, 81, 81, 81, 19,
38, 39, 40, 84, 81, 3, 6, 81,
81, 82, 81, 81, 81, 81, 81, 81,
29, 30, 74, 31, 32, 33, 74, 74,
74, 74, 37, 38, 39, 40, 6, 74,
18, 19, 20, 21, 22, 74, 74, 74,
74, 74, 74, 28, 29, 30, 74, 31,
32, 33, 74, 74, 74, 74, 19, 38,
39, 40, 76, 74, 19, 20, 21, 22,
74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 31, 32, 33, 74, 74, 74,
74, 74, 38, 39, 40, 76, 74, 20,
21, 22, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 38, 39, 40, 74,
21, 22, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 38, 39, 40, 74,
22, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 38, 39, 40, 74, 38,
39, 74, 39, 74, 20, 21, 22, 74,
74, 74, 74, 74, 74, 74, 74, 74,
74, 31, 32, 33, 74, 74, 74, 74,
74, 38, 39, 40, 76, 74, 20, 21,
22, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 32, 33, 74, 74,
74, 74, 74, 38, 39, 40, 76, 74,
20, 21, 22, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 33,
74, 74, 74, 74, 74, 38, 39, 40,
76, 74, 20, 21, 22, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 38,
39, 40, 76, 74, 19, 20, 21, 22,
74, 74, 74, 74, 74, 74, 28, 29,
30, 74, 31, 32, 33, 74, 74, 74,
74, 19, 38, 39, 40, 76, 74, 19,
20, 21, 22, 74, 74, 74, 74, 74,
74, 74, 29, 30, 74, 31, 32, 33,
74, 74, 74, 74, 19, 38, 39, 40,
76, 74, 19, 20, 21, 22, 74, 74,
74, 74, 74, 74, 74, 74, 30, 74,
31, 32, 33, 74, 74, 74, 74, 19,
38, 39, 40, 76, 74, 18, 19, 20,
21, 22, 74, 24, 18, 74, 74, 74,
28, 29, 30, 74, 31, 32, 33, 74,
74, 74, 74, 19, 38, 39, 40, 76,
74, 18, 19, 20, 21, 22, 74, 77,
18, 74, 74, 74, 28, 29, 30, 74,
31, 32, 33, 74, 74, 74, 74, 19,
38, 39, 40, 76, 74, 18, 19, 20,
21, 22, 74, 74, 18, 74, 74, 74,
28, 29, 30, 74, 31, 32, 33, 74,
74, 74, 74, 19, 38, 39, 40, 76,
74, 18, 19, 20, 21, 22, 23, 24,
18, 74, 74, 74, 28, 29, 30, 74,
31, 32, 33, 74, 74, 74, 74, 19,
38, 39, 40, 76, 74, 3, 6, 74,
74, 75, 74, 74, 74, 74, 74, 74,
18, 19, 20, 21, 22, 23, 24, 18,
25, 81, 27, 28, 29, 30, 81, 31,
32, 33, 81, 81, 81, 81, 37, 38,
39, 40, 6, 81, 3, 81, 81, 81,
81, 81, 81, 81, 81, 81, 81, 81,
81, 81, 4, 81, 81, 81, 81, 81,
81, 81, 19, 20, 21, 22, 81, 81,
81, 81, 81, 81, 81, 81, 81, 81,
31, 32, 33, 81, 81, 81, 81, 81,
38, 39, 40, 84, 81, 3, 86, 86,
86, 86, 86, 86, 86, 86, 86, 86,
86, 86, 86, 4, 86, 87, 81, 14,
81, 81, 81, 81, 81, 81, 81, 88,
81, 14, 81, 6, 86, 86, 86, 86,
86, 86, 86, 86, 86, 86, 86, 86,
86, 86, 86, 86, 86, 86, 86, 86,
86, 86, 86, 86, 86, 86, 86, 86,
86, 86, 86, 6, 86, 86, 86, 6,
86, 9, 81, 81, 81, 9, 81, 81,
81, 81, 81, 3, 6, 14, 81, 82,
81, 81, 81, 81, 81, 81, 18, 19,
20, 21, 22, 23, 24, 18, 25, 26,
27, 28, 29, 30, 81, 31, 32, 33,
81, 34, 35, 81, 37, 38, 39, 40,
6, 81, 3, 6, 81, 81, 82, 81,
81, 81, 81, 81, 81, 18, 19, 20,
21, 22, 23, 24, 18, 25, 26, 27,
28, 29, 30, 81, 31, 32, 33, 81,
81, 81, 81, 37, 38, 39, 40, 6,
81, 34, 35, 81, 35, 81, 78, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 80, 80, 80, 80, 80, 80,
80, 80, 78, 79, 80, 9, 86, 86,
86, 9, 86, 0
25, 74, 27, 28, 29, 30, 74, 31,
32, 33, 74, 74, 74, 74, 37, 38,
39, 40, 6, 74, 3, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 4, 74, 74, 74, 74, 74,
74, 74, 19, 20, 21, 22, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74,
31, 32, 33, 74, 74, 74, 74, 74,
38, 39, 40, 76, 74, 3, 78, 78,
78, 78, 78, 78, 78, 78, 78, 78,
78, 78, 78, 4, 78, 79, 74, 14,
74, 6, 78, 78, 78, 78, 78, 78,
78, 78, 78, 78, 78, 78, 78, 78,
78, 78, 78, 78, 78, 78, 78, 78,
78, 78, 78, 78, 78, 78, 78, 78,
78, 6, 78, 78, 78, 6, 78, 9,
74, 74, 74, 9, 74, 74, 74, 74,
74, 3, 6, 14, 74, 75, 74, 74,
74, 74, 74, 74, 18, 19, 20, 21,
22, 23, 24, 18, 25, 26, 27, 28,
29, 30, 74, 31, 32, 33, 74, 34,
35, 74, 37, 38, 39, 40, 6, 74,
3, 6, 74, 74, 75, 74, 74, 74,
74, 74, 74, 18, 19, 20, 21, 22,
23, 24, 18, 25, 26, 27, 28, 29,
30, 74, 31, 32, 33, 74, 74, 74,
74, 37, 38, 39, 40, 6, 74, 34,
35, 74, 35, 74, 9, 78, 78, 78,
9, 78, 0
};
static const char _use_syllable_machine_trans_targs[] = {
5, 9, 5, 41, 2, 5, 1, 53,
6, 7, 5, 34, 37, 63, 64, 67,
68, 72, 43, 44, 45, 46, 47, 57,
58, 60, 69, 61, 54, 55, 56, 50,
51, 52, 70, 71, 73, 62, 48, 49,
5, 5, 5, 5, 8, 0, 33, 12,
13, 14, 15, 16, 27, 28, 30, 31,
24, 25, 26, 19, 20, 21, 32, 17,
18, 5, 11, 5, 10, 22, 5, 23,
29, 5, 35, 36, 5, 38, 39, 40,
5, 5, 3, 42, 4, 59, 5, 65,
66
5, 8, 5, 36, 2, 5, 1, 47,
5, 6, 5, 31, 33, 57, 58, 60,
61, 34, 37, 38, 39, 40, 41, 51,
52, 54, 62, 55, 48, 49, 50, 44,
45, 46, 63, 64, 65, 56, 42, 43,
5, 5, 7, 0, 10, 11, 12, 13,
14, 25, 26, 28, 29, 22, 23, 24,
17, 18, 19, 30, 15, 16, 5, 5,
9, 20, 5, 21, 27, 5, 32, 5,
35, 5, 5, 3, 4, 53, 5, 59
};
static const char _use_syllable_machine_trans_actions[] = {
1, 0, 2, 3, 0, 4, 0, 5,
0, 5, 8, 0, 5, 9, 0, 9,
8, 5, 9, 0, 5, 10, 0, 10,
3, 0, 5, 5, 0, 0, 0, 5,
5, 5, 3, 3, 5, 5, 5, 5,
5, 5, 0, 0, 0, 3, 0, 0,
10, 11, 12, 13, 5, 0, 5, 0,
0, 0, 0, 0, 0, 0, 0, 5,
0, 0, 0, 0, 0, 0, 0, 0,
0, 14, 5, 15, 0, 0, 16, 0,
0, 17, 0, 0, 18, 5, 0, 0,
19, 20, 0, 3, 0, 5, 21, 0,
0
11, 12, 5, 0, 0, 0, 0, 0,
0, 0, 0, 0, 5, 0, 0, 0,
0, 0, 0, 0, 0, 0, 13, 14,
0, 0, 15, 0, 0, 16, 0, 17,
0, 18, 19, 0, 0, 5, 20, 0
};
static const char _use_syllable_machine_to_state_actions[] = {
@ -332,7 +301,6 @@ static const char _use_syllable_machine_to_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0
};
@ -345,21 +313,19 @@ static const char _use_syllable_machine_from_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0
};
static const short _use_syllable_machine_eof_trans[] = {
1, 3, 3, 6, 6, 0, 42, 44,
44, 68, 68, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 71, 44,
44, 44, 44, 44, 44, 44, 44, 44,
68, 44, 74, 77, 74, 44, 44, 81,
81, 82, 82, 82, 82, 82, 82, 82,
82, 82, 82, 82, 82, 82, 82, 82,
82, 82, 82, 82, 82, 82, 82, 87,
82, 82, 82, 87, 82, 82, 82, 82,
81, 87
1, 3, 3, 6, 6, 0, 42, 42,
64, 64, 42, 42, 42, 42, 42, 42,
42, 42, 42, 42, 67, 42, 42, 42,
42, 42, 42, 42, 42, 42, 64, 70,
72, 42, 74, 74, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 75, 75, 75, 75, 75, 75, 75,
75, 79, 75, 75, 79, 75, 75, 75,
75, 79
};
static const int use_syllable_machine_start = 5;
@ -369,30 +335,53 @@ static const int use_syllable_machine_error = -1;
static const int use_syllable_machine_en_main = 5;
#line 38 "hb-ot-shape-complex-use-machine.rl"
#line 39 "hb-ot-shape-complex-use-machine.rl"
#line 162 "hb-ot-shape-complex-use-machine.rl"
#line 161 "hb-ot-shape-complex-use-machine.rl"
#define found_syllable(syllable_type) \
HB_STMT_START { \
if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \
for (unsigned int i = ts; i < te; i++) \
if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \
for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \
info[i].syllable() = (syllable_serial << 4) | use_##syllable_type; \
syllable_serial++; \
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static bool
not_standard_default_ignorable (const hb_glyph_info_t &i)
{ return !((i.use_category() == USE_O || i.use_category() == USE_Rsv) && _hb_glyph_info_is_default_ignorable (&i)); }
static void
find_syllables_use (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts, te, act;
int cs;
hb_glyph_info_t *info = buffer->info;
auto p =
+ hb_iter (info, buffer->len)
| hb_enumerate
| hb_filter (not_standard_default_ignorable, hb_second)
| hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p)
{
if (p.second.use_category() == USE_ZWNJ)
for (unsigned i = p.first + 1; i < buffer->len; ++i)
if (not_standard_default_ignorable (info[i]))
return !_hb_glyph_info_is_unicode_mark (&info[i]);
return true;
})
| hb_enumerate
| machine_index
;
auto pe = p + p.len ();
auto eof = +pe;
auto ts = +p;
auto te = +p;
unsigned int act;
int cs;
#line 396 "hb-ot-shape-complex-use-machine.hh"
#line 385 "hb-ot-shape-complex-use-machine.hh"
{
cs = use_syllable_machine_start;
ts = 0;
@ -400,15 +389,12 @@ find_syllables_use (hb_buffer_t *buffer)
act = 0;
}
#line 182 "hb-ot-shape-complex-use-machine.rl"
#line 204 "hb-ot-shape-complex-use-machine.rl"
p = 0;
pe = eof = buffer->len;
unsigned int syllable_serial = 1;
#line 412 "hb-ot-shape-complex-use-machine.hh"
#line 398 "hb-ot-shape-complex-use-machine.hh"
{
int _slen;
int _trans;
@ -422,16 +408,16 @@ _resume:
#line 1 "NONE"
{ts = p;}
break;
#line 426 "hb-ot-shape-complex-use-machine.hh"
#line 412 "hb-ot-shape-complex-use-machine.hh"
}
_keys = _use_syllable_machine_trans_keys + (cs<<1);
_inds = _use_syllable_machine_indicies + _use_syllable_machine_index_offsets[cs];
_slen = _use_syllable_machine_key_spans[cs];
_trans = _inds[ _slen > 0 && _keys[0] <=( info[p].use_category()) &&
( info[p].use_category()) <= _keys[1] ?
( info[p].use_category()) - _keys[0] : _slen ];
_trans = _inds[ _slen > 0 && _keys[0] <=( (*p).second.second.use_category()) &&
( (*p).second.second.use_category()) <= _keys[1] ?
( (*p).second.second.use_category()) - _keys[0] : _slen ];
_eof_trans:
cs = _use_syllable_machine_trans_targs[_trans];
@ -444,64 +430,60 @@ _eof_trans:
#line 1 "NONE"
{te = p+1;}
break;
case 12:
#line 150 "hb-ot-shape-complex-use-machine.rl"
case 8:
#line 149 "hb-ot-shape-complex-use-machine.rl"
{te = p+1;{ found_syllable (independent_cluster); }}
break;
case 14:
#line 153 "hb-ot-shape-complex-use-machine.rl"
case 13:
#line 152 "hb-ot-shape-complex-use-machine.rl"
{te = p+1;{ found_syllable (standard_cluster); }}
break;
case 10:
#line 157 "hb-ot-shape-complex-use-machine.rl"
case 11:
#line 156 "hb-ot-shape-complex-use-machine.rl"
{te = p+1;{ found_syllable (broken_cluster); }}
break;
case 8:
#line 158 "hb-ot-shape-complex-use-machine.rl"
case 9:
#line 157 "hb-ot-shape-complex-use-machine.rl"
{te = p+1;{ found_syllable (non_cluster); }}
break;
case 11:
case 14:
#line 150 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (independent_cluster); }}
{te = p;p--;{ found_syllable (virama_terminated_cluster); }}
break;
case 15:
#line 151 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (virama_terminated_cluster); }}
break;
case 16:
#line 152 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (sakot_terminated_cluster); }}
break;
case 13:
#line 153 "hb-ot-shape-complex-use-machine.rl"
case 12:
#line 152 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (standard_cluster); }}
break;
case 18:
#line 154 "hb-ot-shape-complex-use-machine.rl"
case 17:
#line 153 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (number_joiner_terminated_cluster); }}
break;
case 17:
#line 155 "hb-ot-shape-complex-use-machine.rl"
case 16:
#line 154 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (numeral_cluster); }}
break;
case 18:
#line 155 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (symbol_cluster); }}
break;
case 19:
#line 156 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (symbol_cluster); }}
{te = p;p--;{ found_syllable (broken_cluster); }}
break;
case 20:
#line 157 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (broken_cluster); }}
break;
case 21:
#line 158 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (non_cluster); }}
break;
case 1:
#line 153 "hb-ot-shape-complex-use-machine.rl"
#line 152 "hb-ot-shape-complex-use-machine.rl"
{{p = ((te))-1;}{ found_syllable (standard_cluster); }}
break;
case 4:
#line 157 "hb-ot-shape-complex-use-machine.rl"
#line 156 "hb-ot-shape-complex-use-machine.rl"
{{p = ((te))-1;}{ found_syllable (broken_cluster); }}
break;
case 2:
@ -519,16 +501,16 @@ _eof_trans:
case 3:
#line 1 "NONE"
{te = p+1;}
#line 157 "hb-ot-shape-complex-use-machine.rl"
#line 156 "hb-ot-shape-complex-use-machine.rl"
{act = 8;}
break;
case 9:
case 10:
#line 1 "NONE"
{te = p+1;}
#line 158 "hb-ot-shape-complex-use-machine.rl"
#line 157 "hb-ot-shape-complex-use-machine.rl"
{act = 9;}
break;
#line 532 "hb-ot-shape-complex-use-machine.hh"
#line 514 "hb-ot-shape-complex-use-machine.hh"
}
_again:
@ -537,7 +519,7 @@ _again:
#line 1 "NONE"
{ts = 0;}
break;
#line 541 "hb-ot-shape-complex-use-machine.hh"
#line 523 "hb-ot-shape-complex-use-machine.hh"
}
if ( ++p != pe )
@ -553,7 +535,7 @@ _again:
}
#line 190 "hb-ot-shape-complex-use-machine.rl"
#line 209 "hb-ot-shape-complex-use-machine.rl"
}

View File

@ -30,6 +30,7 @@
#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
#include "hb.hh"
#include "hb-ot-shape-complex-machine-index.hh"
%%{
machine use_syllable_machine;
@ -47,7 +48,6 @@ B = 1; # BASE
IND = 3; # BASE_IND
N = 4; # BASE_NUM
GB = 5; # BASE_OTHER
CGJ = 6; # CGJ
#F = 7; # CONS_FINAL
#FM = 8; # CONS_FINAL_MOD
#M = 9; # CONS_MED
@ -63,7 +63,6 @@ Rsv = 17; # Reserved characters
R = 18; # REPHA
S = 19; # SYM
#SM = 20; # SYM_MOD
VS = 21; # VARIATION_SELECTOR
#V = 36; # VOWEL
#VM = 40; # VOWEL_MOD
CS = 43; # CONS_WITH_STACKER
@ -96,7 +95,7 @@ FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable
h = H | HVM | Sk;
# Override: Adhoc ZWJ placement. https://github.com/harfbuzz/harfbuzz/issues/542#issuecomment-353169729
consonant_modifiers = CMAbv* CMBlw* ((ZWJ?.h.ZWJ? B | SUB) VS? CMAbv? CMBlw*)*;
consonant_modifiers = CMAbv* CMBlw* ((ZWJ?.h.ZWJ? B | SUB) CMAbv? CMBlw*)*;
# Override: Allow two MBlw. https://github.com/harfbuzz/harfbuzz/issues/376
medial_consonants = MPre? MAbv? MBlw?.MBlw? MPst?;
dependent_vowels = VPre* VAbv* VBlw* VPst*;
@ -104,7 +103,7 @@ vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*;
final_consonants = FAbv* FBlw* FPst*;
final_modifiers = FMAbv* FMBlw* | FMPst?;
complex_syllable_start = (R | CS)? (B | GB) VS?;
complex_syllable_start = (R | CS)? (B | GB);
complex_syllable_middle =
consonant_modifiers
medial_consonants
@ -117,8 +116,8 @@ complex_syllable_tail =
final_consonants
final_modifiers
;
number_joiner_terminated_cluster_tail = (HN N VS?)* HN;
numeral_cluster_tail = (HN N VS?)+;
number_joiner_terminated_cluster_tail = (HN N)* HN;
numeral_cluster_tail = (HN N)+;
symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+;
virama_terminated_cluster =
@ -140,10 +139,10 @@ broken_cluster =
(complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail)
;
number_joiner_terminated_cluster = N VS? number_joiner_terminated_cluster_tail;
numeral_cluster = N VS? numeral_cluster_tail?;
symbol_cluster = (S | GB) VS? symbol_cluster_tail?;
independent_cluster = (IND | O | Rsv | WJ) VS?;
number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail;
numeral_cluster = N numeral_cluster_tail?;
symbol_cluster = (S | GB) symbol_cluster_tail?;
independent_cluster = (IND | O | Rsv | WJ);
other = any;
main := |*
@ -163,27 +162,47 @@ main := |*
#define found_syllable(syllable_type) \
HB_STMT_START { \
if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \
for (unsigned int i = ts; i < te; i++) \
if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \
for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \
info[i].syllable() = (syllable_serial << 4) | use_##syllable_type; \
syllable_serial++; \
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static bool
not_standard_default_ignorable (const hb_glyph_info_t &i)
{ return !((i.use_category() == USE_O || i.use_category() == USE_Rsv) && _hb_glyph_info_is_default_ignorable (&i)); }
static void
find_syllables_use (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts, te, act;
int cs;
hb_glyph_info_t *info = buffer->info;
auto p =
+ hb_iter (info, buffer->len)
| hb_enumerate
| hb_filter (not_standard_default_ignorable, hb_second)
| hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p)
{
if (p.second.use_category() == USE_ZWNJ)
for (unsigned i = p.first + 1; i < buffer->len; ++i)
if (not_standard_default_ignorable (info[i]))
return !_hb_glyph_info_is_unicode_mark (&info[i]);
return true;
})
| hb_enumerate
| machine_index
;
auto pe = p + p.len ();
auto eof = +pe;
auto ts = +p;
auto te = +p;
unsigned int act;
int cs;
%%{
write init;
getkey info[p].use_category();
getkey (*p).second.second.use_category();
}%%
p = 0;
pe = eof = buffer->len;
unsigned int syllable_serial = 1;
%%{
write exec;

View File

@ -24,7 +24,6 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-macros"
#define B USE_B /* BASE */
#define CGJ USE_CGJ /* CGJ */
#define CS USE_CS /* CONS_WITH_STACKER */
#define GB USE_GB /* BASE_OTHER */
#define H USE_H /* HALANT */
@ -38,7 +37,6 @@
#define S USE_S /* SYM */
#define SUB USE_SUB /* CONS_SUB */
#define Sk USE_Sk /* SAKOT */
#define VS USE_VS /* VARIATION_SELECTOR */
#define WJ USE_WJ /* Word_Joiner */
#define ZWJ USE_ZWJ /* ZWJ */
#define ZWNJ USE_ZWNJ /* ZWNJ */
@ -86,13 +84,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 00C0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
/* 00D0 */ O, O, O, O, O, O, O, GB,
#define use_offset_0x0348u 80
/* Combining Diacritical Marks */
O, O, O, O, O, O, O, CGJ,
#define use_offset_0x0900u 88
#define use_offset_0x0900u 80
/* Devanagari */
@ -205,7 +197,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 0DE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B,
/* 0DF0 */ O, O, VPst, VPst, O, O, O, O,
#define use_offset_0x0f18u 1360
#define use_offset_0x0f18u 1352
/* Tibetan */
@ -222,7 +214,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 0FB0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, O, O,
/* 0FC0 */ O, O, O, O, O, O, FMBlw, O,
#define use_offset_0x1000u 1536
#define use_offset_0x1000u 1528
/* Myanmar */
@ -238,7 +230,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 1080 */ B, B, MBlw, VPst, VPre, VAbv, VAbv, VMPst, VMPst, VMPst, VMPst, VMPst, VMPst, VMBlw, B, VMPst,
/* 1090 */ B, B, B, B, B, B, B, B, B, B, VMPst, VMPst, VPst, VAbv, O, O,
#define use_offset_0x1700u 1696
#define use_offset_0x1700u 1688
/* Tagalog */
@ -271,7 +263,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 17D0 */ FMAbv, VAbv, H, FMAbv, O, O, O, O, O, O, O, O, B, FMAbv, O, O,
/* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x1900u 1936
#define use_offset_0x1900u 1928
/* Limbu */
@ -315,7 +307,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 1A80 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
/* 1A90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x1b00u 2352
#define use_offset_0x1b00u 2344
/* Balinese */
@ -351,7 +343,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 1C30 */ FAbv, FAbv, FAbv, FAbv, VMPre, VMPre, FMAbv, CMBlw, O, O, O, O, O, O, O, O,
/* 1C40 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, B, B,
#define use_offset_0x1cd0u 2688
#define use_offset_0x1cd0u 2680
/* Vedic Extensions */
@ -360,20 +352,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 1CE0 */ VMAbv, VMPst, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, O, O, O, O, VMBlw, O, O,
/* 1CF0 */ O, O, IND, IND, VMAbv, CS, CS, VMPst, VMAbv, VMAbv, GB, O, O, O, O, O,
#define use_offset_0x1df8u 2736
#define use_offset_0x1df8u 2728
/* Combining Diacritical Marks Supplement */
O, O, O, FMAbv, O, O, O, O,
#define use_offset_0x2008u 2744
#define use_offset_0x2008u 2736
/* General Punctuation */
O, O, O, O, ZWNJ, ZWJ, O, O,
/* 2010 */ GB, GB, GB, GB, GB, O, O, O,
#define use_offset_0x2060u 2760
#define use_offset_0x2060u 2752
/* 2060 */ WJ, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O,
@ -382,20 +374,20 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 2070 */ O, O, O, O, FMPst, O, O, O, O, O, O, O, O, O, O, O,
/* 2080 */ O, O, FMPst, FMPst, FMPst, O, O, O,
#define use_offset_0x20f0u 2800
#define use_offset_0x20f0u 2792
/* Combining Diacritical Marks for Symbols */
/* 20F0 */ VMAbv, O, O, O, O, O, O, O,
#define use_offset_0x25c8u 2808
#define use_offset_0x25c8u 2800
/* Geometric Shapes */
O, O, O, O, GB, O, O, O,
#define use_offset_0xa800u 2816
#define use_offset_0xa800u 2808
/* Syloti Nagri */
@ -482,7 +474,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* AAE0 */ B, B, B, B, B, B, B, B, B, B, B, VPre, VBlw, VAbv, VPre, VPst,
/* AAF0 */ O, O, O, O, O, VMPst, H, O,
#define use_offset_0xabc0u 3576
#define use_offset_0xabc0u 3568
/* Meetei Mayek */
@ -492,14 +484,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O,
/* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0xfe00u 3640
/* Variation Selectors */
/* FE00 */ VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS,
#define use_offset_0x10a00u 3656
#define use_offset_0x10a00u 3632
/* Kharoshthi */
@ -510,7 +495,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H,
/* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O,
#define use_offset_0x11000u 3736
#define use_offset_0x11000u 3712
/* Brahmi */
@ -531,7 +516,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O,
#define use_offset_0x11100u 3928
#define use_offset_0x11100u 3904
/* Chakma */
@ -569,7 +554,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw,
/* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O,
#define use_offset_0x11280u 4248
#define use_offset_0x11280u 4224
/* Multani */
@ -597,7 +582,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
/* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O,
#define use_offset_0x11400u 4496
#define use_offset_0x11400u 4472
/* Newa */
@ -620,7 +605,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 114C0 */ VMAbv, VMAbv, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O,
/* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x11580u 4720
#define use_offset_0x11580u 4696
/* Siddham */
@ -663,7 +648,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11720 */ VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VAbv, VBlw, VAbv, VAbv, VAbv, O, O, O, O,
/* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O,
#define use_offset_0x11800u 5168
#define use_offset_0x11800u 5144
/* Dogra */
@ -673,7 +658,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw,
/* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O,
#define use_offset_0x11900u 5232
#define use_offset_0x11900u 5208
/* Dives Akuru */
@ -685,7 +670,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11940 */ MPst, R, MBlw, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O,
/* 11950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x119a0u 5328
#define use_offset_0x119a0u 5304
/* Nandinagari */
@ -713,7 +698,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11A80 */ B, B, B, B, R, R, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw,
/* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O,
#define use_offset_0x11c00u 5584
#define use_offset_0x11c00u 5560
/* Bhaiksuki */
@ -734,7 +719,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB,
/* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O,
#define use_offset_0x11d00u 5768
#define use_offset_0x11d00u 5744
/* Masaram Gondi */
@ -754,7 +739,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O,
/* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O,
#define use_offset_0x11ee0u 5944
#define use_offset_0x11ee0u 5920
/* Makasar */
@ -762,7 +747,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = {
/* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B,
/* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O,
}; /* Table items: 5968; occupancy: 74% */
}; /* Table items: 5944; occupancy: 74% */
USE_TABLE_ELEMENT_TYPE
hb_use_get_category (hb_codepoint_t u)
@ -772,7 +757,6 @@ hb_use_get_category (hb_codepoint_t u)
case 0x0u:
if (hb_in_range<hb_codepoint_t> (u, 0x0028u, 0x003Fu)) return use_table[u - 0x0028u + use_offset_0x0028u];
if (hb_in_range<hb_codepoint_t> (u, 0x00A0u, 0x00D7u)) return use_table[u - 0x00A0u + use_offset_0x00a0u];
if (hb_in_range<hb_codepoint_t> (u, 0x0348u, 0x034Fu)) return use_table[u - 0x0348u + use_offset_0x0348u];
if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0DF7u)) return use_table[u - 0x0900u + use_offset_0x0900u];
if (hb_in_range<hb_codepoint_t> (u, 0x0F18u, 0x0FC7u)) return use_table[u - 0x0F18u + use_offset_0x0f18u];
break;
@ -798,10 +782,6 @@ hb_use_get_category (hb_codepoint_t u)
if (hb_in_range<hb_codepoint_t> (u, 0xABC0u, 0xABFFu)) return use_table[u - 0xABC0u + use_offset_0xabc0u];
break;
case 0xFu:
if (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu)) return use_table[u - 0xFE00u + use_offset_0xfe00u];
break;
case 0x10u:
if (hb_in_range<hb_codepoint_t> (u, 0x10A00u, 0x10A4Fu)) return use_table[u - 0x10A00u + use_offset_0x10a00u];
break;
@ -827,7 +807,6 @@ hb_use_get_category (hb_codepoint_t u)
}
#undef B
#undef CGJ
#undef CS
#undef GB
#undef H
@ -841,7 +820,6 @@ hb_use_get_category (hb_codepoint_t u)
#undef S
#undef SUB
#undef Sk
#undef VS
#undef WJ
#undef ZWJ
#undef ZWNJ

View File

@ -49,7 +49,6 @@ enum use_category_t {
USE_IND = 3, /* BASE_IND */
USE_N = 4, /* BASE_NUM */
USE_GB = 5, /* BASE_OTHER */
USE_CGJ = 6, /* CGJ */
// USE_F = 7, /* CONS_FINAL */
USE_FM = 8, /* CONS_FINAL_MOD */
// USE_M = 9, /* CONS_MED */
@ -65,7 +64,6 @@ enum use_category_t {
USE_R = 18, /* REPHA */
USE_S = 19, /* SYM */
// USE_SM = 20, /* SYM_MOD */
USE_VS = 21, /* VARIATION_SELECTOR */
// USE_V = 36, /* VOWEL */
// USE_VM = 40, /* VOWEL_MOD */
USE_CS = 43, /* CONS_WITH_STACKER */