/* * Copyright © 2015 Mozilla Foundation. * Copyright © 2015 Google, Inc. * * This is part of HarfBuzz, a text shaping library. * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and its documentation for any purpose, provided that the * above copyright notice and the following two paragraphs appear in * all copies of this software. * * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. * * Mozilla Author(s): Jonathan Kew * Google Author(s): Behdad Esfahbod */ #ifndef HB_OT_SHAPER_USE_MACHINE_HH #define HB_OT_SHAPER_USE_MACHINE_HH #include "hb.hh" #include "hb-ot-shaper-syllabic.hh" /* buffer var allocations */ #define use_category() ot_shaper_var_u8_category() #define USE(Cat) use_syllable_machine_ex_##Cat enum use_syllable_type_t { use_virama_terminated_cluster, use_sakot_terminated_cluster, use_standard_cluster, use_number_joiner_terminated_cluster, use_numeral_cluster, use_symbol_cluster, use_hieroglyph_cluster, use_broken_cluster, use_non_cluster, }; %%{ machine use_syllable_machine; alphtype unsigned char; write exports; write data; }%% %%{ # Categories used in the Universal Shaping Engine spec: # https://docs.microsoft.com/en-us/typography/script-development/use export O = 0; # OTHER export B = 1; # BASE export N = 4; # BASE_NUM export GB = 5; # BASE_OTHER export CGJ = 6; # CGJ export SUB = 11; # CONS_SUB export H = 12; # HALANT export HN = 13; # HALANT_NUM export ZWNJ = 14; # Zero width non-joiner export WJ = 16; # Word joiner export R = 18; # REPHA export CS = 43; # CONS_WITH_STACKER export IS = 44; # INVISIBLE_STACKER export Sk = 48; # SAKOT export G = 49; # HIEROGLYPH export J = 50; # HIEROGLYPH_JOINER export SB = 51; # HIEROGLYPH_SEGMENT_BEGIN export SE = 52; # HIEROGLYPH_SEGMENT_END export HVM = 53; # HALANT_OR_VOWEL_MODIFIER export FAbv = 24; # CONS_FINAL_ABOVE export FBlw = 25; # CONS_FINAL_BELOW export FPst = 26; # CONS_FINAL_POST export MAbv = 27; # CONS_MED_ABOVE export MBlw = 28; # CONS_MED_BELOW export MPst = 29; # CONS_MED_POST export MPre = 30; # CONS_MED_PRE export CMAbv = 31; # CONS_MOD_ABOVE export CMBlw = 32; # CONS_MOD_BELOW export VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST export VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST export VPst = 35; # VOWEL_POST UIPC = Right export VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST export VMAbv = 37; # VOWEL_MOD_ABOVE export VMBlw = 38; # VOWEL_MOD_BELOW export VMPst = 39; # VOWEL_MOD_POST export VMPre = 23; # VOWEL_MOD_PRE export SMAbv = 41; # SYM_MOD_ABOVE export SMBlw = 42; # SYM_MOD_BELOW export FMAbv = 45; # CONS_FINAL_MOD UIPC = Top export FMBlw = 46; # CONS_FINAL_MOD UIPC = Bottom export FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable h = H | HVM | IS | Sk; consonant_modifiers = CMAbv* CMBlw* ((h B | SUB) CMAbv? CMBlw*)*; medial_consonants = MPre? MAbv? MBlw? MPst?; dependent_vowels = VPre* VAbv* VBlw* VPst* | H; vowel_modifiers = HVM? VMPre* VMAbv* VMBlw* VMPst*; final_consonants = FAbv* FBlw* FPst*; final_modifiers = FMAbv* FMBlw* | FMPst?; complex_syllable_start = (R | CS)? (B | GB); complex_syllable_middle = consonant_modifiers medial_consonants dependent_vowels vowel_modifiers (Sk B)* ; complex_syllable_tail = complex_syllable_middle final_consonants final_modifiers ; number_joiner_terminated_cluster_tail = (HN N)* HN; numeral_cluster_tail = (HN N)+; symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+; virama_terminated_cluster_tail = consonant_modifiers IS ; virama_terminated_cluster = complex_syllable_start virama_terminated_cluster_tail ; sakot_terminated_cluster_tail = complex_syllable_middle Sk ; sakot_terminated_cluster = complex_syllable_start sakot_terminated_cluster_tail ; standard_cluster = complex_syllable_start complex_syllable_tail ; tail = complex_syllable_tail | sakot_terminated_cluster_tail | symbol_cluster_tail | virama_terminated_cluster_tail; broken_cluster = R? (tail | number_joiner_terminated_cluster_tail | numeral_cluster_tail) ; number_joiner_terminated_cluster = N number_joiner_terminated_cluster_tail; numeral_cluster = N numeral_cluster_tail?; symbol_cluster = (O | GB) tail?; hieroglyph_cluster = SB+ | SB* G SE* (J SE* (G SE*)?)*; other = any; main := |* virama_terminated_cluster ZWNJ? => { found_syllable (use_virama_terminated_cluster); }; sakot_terminated_cluster ZWNJ? => { found_syllable (use_sakot_terminated_cluster); }; standard_cluster ZWNJ? => { found_syllable (use_standard_cluster); }; number_joiner_terminated_cluster ZWNJ? => { found_syllable (use_number_joiner_terminated_cluster); }; numeral_cluster ZWNJ? => { found_syllable (use_numeral_cluster); }; symbol_cluster ZWNJ? => { found_syllable (use_symbol_cluster); }; hieroglyph_cluster ZWNJ? => { found_syllable (use_hieroglyph_cluster); }; broken_cluster ZWNJ? => { found_syllable (use_broken_cluster); buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_BROKEN_SYLLABLE; }; other => { found_syllable (use_non_cluster); }; *|; }%% #define found_syllable(syllable_type) \ HB_STMT_START { \ if (0) fprintf (stderr, "syllable %u..%u %s\n", (*ts).second.first, (*te).second.first, #syllable_type); \ for (unsigned i = (*ts).second.first; i < (*te).second.first; ++i) \ info[i].syllable() = (syllable_serial << 4) | syllable_type; \ syllable_serial++; \ if (syllable_serial == 16) syllable_serial = 1; \ } HB_STMT_END template struct machine_index_t : hb_iter_with_fallback_t, typename Iter::item_t> { machine_index_t (const Iter& it) : it (it) {} machine_index_t (const machine_index_t& o) : hb_iter_with_fallback_t, typename Iter::item_t> (), it (o.it), is_null (o.is_null) {} static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator; static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator; typename Iter::item_t __item__ () const { return *it; } typename Iter::item_t __item_at__ (unsigned i) const { return it[i]; } unsigned __len__ () const { return it.len (); } void __next__ () { ++it; } void __forward__ (unsigned n) { it += n; } void __prev__ () { --it; } void __rewind__ (unsigned n) { it -= n; } void operator = (unsigned n) { assert (n == 0); is_null = true; } explicit operator bool () { return !is_null; } void operator = (const machine_index_t& o) { is_null = o.is_null; unsigned index = (*it).first; unsigned n = (*o.it).first; if (index < n) it += n - index; else if (index > n) it -= index - n; } bool operator == (const machine_index_t& o) const { return is_null ? o.is_null : !o.is_null && (*it).first == (*o.it).first; } bool operator != (const machine_index_t& o) const { return !(*this == o); } private: Iter it; bool is_null = false; }; struct { template machine_index_t> operator () (Iter&& it) const { return machine_index_t> (hb_iter (it)); } } HB_FUNCOBJ (machine_index); static bool not_ccs_default_ignorable (const hb_glyph_info_t &i) { return i.use_category() != USE(CGJ); } static inline void find_syllables_use (hb_buffer_t *buffer) { hb_glyph_info_t *info = buffer->info; auto p = + hb_iter (info, buffer->len) | hb_enumerate | hb_filter ([] (const hb_glyph_info_t &i) { return not_ccs_default_ignorable (i); }, hb_second) | hb_filter ([&] (const hb_pair_t p) { if (p.second.use_category() == USE(ZWNJ)) for (unsigned i = p.first + 1; i < buffer->len; ++i) if (not_ccs_default_ignorable (info[i])) return !_hb_glyph_info_is_unicode_mark (&info[i]); return true; }) | hb_enumerate | machine_index ; auto pe = p + p.len (); auto eof = +pe; auto ts = +p; auto te = +p; unsigned int act HB_UNUSED; int cs; %%{ write init; getkey (*p).second.second.use_category(); }%% unsigned int syllable_serial = 1; %%{ write exec; }%% } #undef found_syllable #endif /* HB_OT_SHAPER_USE_MACHINE_HH */