2015-07-20 14:30:51 +02:00
|
|
|
/*
|
|
|
|
* Copyright © 2015 Mozilla Foundation.
|
|
|
|
* Copyright © 2015 Google, Inc.
|
|
|
|
*
|
|
|
|
* This is part of HarfBuzz, a text shaping library.
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, without written agreement and without
|
|
|
|
* license or royalty fees, to use, copy, modify, and distribute this
|
|
|
|
* software and its documentation for any purpose, provided that the
|
|
|
|
* above copyright notice and the following two paragraphs appear in
|
|
|
|
* all copies of this software.
|
|
|
|
*
|
|
|
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
|
|
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
|
|
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
|
|
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
|
|
|
* DAMAGE.
|
|
|
|
*
|
|
|
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
|
|
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
|
|
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
|
|
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
|
|
|
*
|
|
|
|
* Mozilla Author(s): Jonathan Kew
|
|
|
|
* Google Author(s): Behdad Esfahbod
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
|
|
|
|
#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
|
|
|
|
|
2018-08-26 07:36:36 +02:00
|
|
|
#include "hb.hh"
|
2015-07-20 14:30:51 +02:00
|
|
|
|
|
|
|
%%{
|
|
|
|
machine use_syllable_machine;
|
|
|
|
alphtype unsigned char;
|
|
|
|
write data;
|
|
|
|
}%%
|
|
|
|
|
|
|
|
%%{
|
|
|
|
|
2021-01-16 03:26:04 +01:00
|
|
|
# Same order as enum use_category_t. Not sure how to avoid duplication.
|
|
|
|
|
|
|
|
O = 0; # OTHER
|
|
|
|
|
|
|
|
B = 1; # BASE
|
|
|
|
N = 4; # BASE_NUM
|
|
|
|
GB = 5; # BASE_OTHER
|
|
|
|
SUB = 11; # CONS_SUB
|
|
|
|
H = 12; # HALANT
|
|
|
|
|
|
|
|
HN = 13; # HALANT_NUM
|
|
|
|
ZWNJ = 14; # Zero width non-joiner
|
|
|
|
R = 18; # REPHA
|
|
|
|
S = 19; # SYM
|
|
|
|
CS = 43; # CONS_WITH_STACKER
|
|
|
|
HVM = 44; # HALANT_OR_VOWEL_MODIFIER
|
|
|
|
Sk = 48; # SAKOT
|
|
|
|
G = 49; # HIEROGLYPH
|
|
|
|
J = 50; # HIEROGLYPH_JOINER
|
|
|
|
SB = 51; # HIEROGLYPH_SEGMENT_BEGIN
|
|
|
|
SE = 52; # HIEROGLYPH_SEGMENT_END
|
|
|
|
|
|
|
|
FAbv = 24; # CONS_FINAL_ABOVE
|
|
|
|
FBlw = 25; # CONS_FINAL_BELOW
|
|
|
|
FPst = 26; # CONS_FINAL_POST
|
|
|
|
MAbv = 27; # CONS_MED_ABOVE
|
|
|
|
MBlw = 28; # CONS_MED_BELOW
|
|
|
|
MPst = 29; # CONS_MED_POST
|
|
|
|
MPre = 30; # CONS_MED_PRE
|
|
|
|
CMAbv = 31; # CONS_MOD_ABOVE
|
|
|
|
CMBlw = 32; # CONS_MOD_BELOW
|
|
|
|
VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
|
|
|
|
VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST
|
|
|
|
VPst = 35; # VOWEL_POST UIPC = Right
|
|
|
|
VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
|
|
|
|
VMAbv = 37; # VOWEL_MOD_ABOVE
|
|
|
|
VMBlw = 38; # VOWEL_MOD_BELOW
|
|
|
|
VMPst = 39; # VOWEL_MOD_POST
|
|
|
|
VMPre = 23; # VOWEL_MOD_PRE
|
|
|
|
SMAbv = 41; # SYM_MOD_ABOVE
|
|
|
|
SMBlw = 42; # SYM_MOD_BELOW
|
|
|
|
FMAbv = 45; # CONS_FINAL_MOD UIPC = Top
|
|
|
|
FMBlw = 46; # CONS_FINAL_MOD UIPC = Bottom
|
|
|
|
FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable
|
2018-10-03 12:29:56 +02:00
|
|
|
|
2019-05-26 17:05:54 +02:00
|
|
|
h = H | HVM | Sk;
|
2015-07-20 14:30:51 +02:00
|
|
|
|
2020-10-17 04:41:12 +02:00
|
|
|
consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*;
|
2020-08-13 19:37:45 +02:00
|
|
|
medial_consonants = MPre? MAbv? MBlw? MPst?;
|
2015-07-20 14:30:51 +02:00
|
|
|
dependent_vowels = VPre* VAbv* VBlw* VPst*;
|
2018-10-03 12:29:56 +02:00
|
|
|
vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*;
|
2019-05-19 16:01:20 +02:00
|
|
|
final_consonants = FAbv* FBlw* FPst*;
|
|
|
|
final_modifiers = FMAbv* FMBlw* | FMPst?;
|
2015-07-20 14:30:51 +02:00
|
|
|
|
2019-05-03 20:16:50 +02:00
|
|
|
complex_syllable_start = (R | CS)? (B | GB);
|
2019-05-26 17:05:54 +02:00
|
|
|
complex_syllable_middle =
|
2018-10-02 17:49:06 +02:00
|
|
|
consonant_modifiers
|
|
|
|
medial_consonants
|
|
|
|
dependent_vowels
|
|
|
|
vowel_modifiers
|
2019-05-26 17:05:54 +02:00
|
|
|
(Sk B)*
|
|
|
|
;
|
|
|
|
complex_syllable_tail =
|
|
|
|
complex_syllable_middle
|
2018-10-02 17:49:06 +02:00
|
|
|
final_consonants
|
2019-05-19 16:01:20 +02:00
|
|
|
final_modifiers
|
2018-10-02 17:49:06 +02:00
|
|
|
;
|
2019-05-03 20:16:50 +02:00
|
|
|
number_joiner_terminated_cluster_tail = (HN N)* HN;
|
|
|
|
numeral_cluster_tail = (HN N)+;
|
2019-05-01 22:15:58 +02:00
|
|
|
symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+;
|
2018-10-02 17:49:06 +02:00
|
|
|
|
2015-07-20 14:30:51 +02:00
|
|
|
virama_terminated_cluster =
|
2019-05-26 17:05:54 +02:00
|
|
|
complex_syllable_start
|
2015-07-20 14:30:51 +02:00
|
|
|
consonant_modifiers
|
2020-10-17 04:41:12 +02:00
|
|
|
h
|
2015-07-20 14:30:51 +02:00
|
|
|
;
|
2019-05-26 17:05:54 +02:00
|
|
|
sakot_terminated_cluster =
|
|
|
|
complex_syllable_start
|
|
|
|
complex_syllable_middle
|
|
|
|
Sk
|
|
|
|
;
|
2016-05-06 18:41:49 +02:00
|
|
|
standard_cluster =
|
2019-05-26 17:05:54 +02:00
|
|
|
complex_syllable_start
|
2018-10-02 17:49:06 +02:00
|
|
|
complex_syllable_tail
|
2015-07-20 14:30:51 +02:00
|
|
|
;
|
2015-07-21 18:14:54 +02:00
|
|
|
broken_cluster =
|
|
|
|
R?
|
2019-05-01 22:15:58 +02:00
|
|
|
(complex_syllable_tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail | symbol_cluster_tail)
|
2015-07-21 18:14:54 +02:00
|
|
|
;
|
|
|
|
|
2019-05-03 20:16:50 +02:00
|
|
|
number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail;
|
|
|
|
numeral_cluster = N numeral_cluster_tail?;
|
|
|
|
symbol_cluster = (S | GB) symbol_cluster_tail?;
|
2020-08-15 00:30:20 +02:00
|
|
|
hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*;
|
2020-10-17 17:57:08 +02:00
|
|
|
independent_cluster = O;
|
2016-05-06 18:28:25 +02:00
|
|
|
other = any;
|
2015-07-20 14:30:51 +02:00
|
|
|
|
|
|
|
main := |*
|
|
|
|
independent_cluster => { found_syllable (independent_cluster); };
|
|
|
|
virama_terminated_cluster => { found_syllable (virama_terminated_cluster); };
|
2019-05-26 17:05:54 +02:00
|
|
|
sakot_terminated_cluster => { found_syllable (sakot_terminated_cluster); };
|
2016-05-06 18:41:49 +02:00
|
|
|
standard_cluster => { found_syllable (standard_cluster); };
|
2015-07-20 14:30:51 +02:00
|
|
|
number_joiner_terminated_cluster => { found_syllable (number_joiner_terminated_cluster); };
|
|
|
|
numeral_cluster => { found_syllable (numeral_cluster); };
|
|
|
|
symbol_cluster => { found_syllable (symbol_cluster); };
|
2020-08-15 00:30:20 +02:00
|
|
|
hieroglyph_cluster => { found_syllable (hieroglyph_cluster); };
|
2015-07-21 18:14:54 +02:00
|
|
|
broken_cluster => { found_syllable (broken_cluster); };
|
2016-05-06 18:28:25 +02:00
|
|
|
other => { found_syllable (non_cluster); };
|
2015-07-20 14:30:51 +02:00
|
|
|
*|;
|
|
|
|
|
|
|
|
|
|
|
|
}%%
|
|
|
|
|
|
|
|
#define found_syllable(syllable_type) \
|
|
|
|
HB_STMT_START { \
|
2019-05-03 20:16:50 +02:00
|
|
|
if (0) fprintf (stderr, "syllable %d..%d %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \
|
|
|
|
for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \
|
2019-07-02 23:42:45 +02:00
|
|
|
info[i].syllable() = (syllable_serial << 4) | use_##syllable_type; \
|
2015-07-20 14:30:51 +02:00
|
|
|
syllable_serial++; \
|
|
|
|
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
|
|
|
|
} HB_STMT_END
|
|
|
|
|
2019-05-03 20:16:50 +02:00
|
|
|
static bool
|
|
|
|
not_standard_default_ignorable (const hb_glyph_info_t &i)
|
2020-10-17 17:57:08 +02:00
|
|
|
{ return !(i.use_category() == USE_O && _hb_glyph_info_is_default_ignorable (&i)); }
|
2019-05-03 20:16:50 +02:00
|
|
|
|
2015-07-20 14:30:51 +02:00
|
|
|
static void
|
2019-07-02 23:42:45 +02:00
|
|
|
find_syllables_use (hb_buffer_t *buffer)
|
2015-07-20 14:30:51 +02:00
|
|
|
{
|
|
|
|
hb_glyph_info_t *info = buffer->info;
|
2019-05-03 20:16:50 +02:00
|
|
|
auto p =
|
|
|
|
+ hb_iter (info, buffer->len)
|
|
|
|
| hb_enumerate
|
2020-09-29 03:12:46 +02:00
|
|
|
| hb_filter ([] (const hb_glyph_info_t &i) { return not_standard_default_ignorable (i); },
|
|
|
|
hb_second)
|
2019-05-03 20:16:50 +02:00
|
|
|
| hb_filter ([&] (const hb_pair_t<unsigned, const hb_glyph_info_t &> p)
|
|
|
|
{
|
|
|
|
if (p.second.use_category() == USE_ZWNJ)
|
|
|
|
for (unsigned i = p.first + 1; i < buffer->len; ++i)
|
|
|
|
if (not_standard_default_ignorable (info[i]))
|
|
|
|
return !_hb_glyph_info_is_unicode_mark (&info[i]);
|
|
|
|
return true;
|
|
|
|
})
|
|
|
|
| hb_enumerate
|
|
|
|
| machine_index
|
|
|
|
;
|
|
|
|
auto pe = p + p.len ();
|
|
|
|
auto eof = +pe;
|
|
|
|
auto ts = +p;
|
|
|
|
auto te = +p;
|
2020-11-21 15:06:27 +01:00
|
|
|
unsigned int act HB_UNUSED;
|
2019-05-03 20:16:50 +02:00
|
|
|
int cs;
|
2015-07-20 14:30:51 +02:00
|
|
|
%%{
|
|
|
|
write init;
|
2019-05-03 20:16:50 +02:00
|
|
|
getkey (*p).second.second.use_category();
|
2015-07-20 14:30:51 +02:00
|
|
|
}%%
|
|
|
|
|
|
|
|
unsigned int syllable_serial = 1;
|
|
|
|
%%{
|
|
|
|
write exec;
|
|
|
|
}%%
|
|
|
|
}
|
|
|
|
|
|
|
|
#undef found_syllable
|
|
|
|
|
|
|
|
#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */
|