From e0eabd7f67462ac34fbfc749d897be478fbd1224 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 20 Jul 2015 13:30:51 +0100 Subject: [PATCH] [USE] Put a Ragel machine together Grammar from the spec! --- src/hb-ot-shape-complex-use-machine.rl | 169 +++++++++++++++++++++++++ src/hb-ot-shape-complex-use-private.hh | 160 +++++++++++++++++++++++ 2 files changed, 329 insertions(+) diff --git a/src/hb-ot-shape-complex-use-machine.rl b/src/hb-ot-shape-complex-use-machine.rl index e69de29bb..0ea71acf9 100644 --- a/src/hb-ot-shape-complex-use-machine.rl +++ b/src/hb-ot-shape-complex-use-machine.rl @@ -0,0 +1,169 @@ +/* + * Copyright © 2015 Mozilla Foundation. + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH +#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH + +#include "hb-private.hh" + +%%{ + machine use_syllable_machine; + alphtype unsigned char; + write data; +}%% + +%%{ + +# Same order as enum use_category_t. Not sure how to avoid duplication. + +O = 0; # OTHER + +B = 1; # BASE +IV = 2; # BASE_VOWEL +IND = 3; # BASE_IND +N = 4; # BASE_NUM +GB = 5; # BASE_OTHER +CGJ = 6; # CGJ +#F = 7; # CONS_FINAL +FM = 8; # CONS_FINAL_MOD +#M = 9; # CONS_MED +#CM = 10; # CONS_MOD +SUB = 11; # CONS_SUB +H = 12; # HALANT +HN = 13; # HALANT_NUM +ZWNJ = 14; # Zero width non-joiner +ZWJ = 15; # Zero width joiner +WJ = 16; # Word joiner +Rsv = 17; # Reserved characters +R = 18; # REPHA +S = 19; # SYM +#SM = 20; # SYM_MOD +VS = 21; # VARIATION_SELECTOR +#V = 22; # VOWEL +#VM = 23; # VOWEL_MOD + +FAbv = 24; # CONS_FINAL_ABOVE +FBlw = 25; # CONS_FINAL_BELOW +FPst = 26; # CONS_FINAL_POST +MAbv = 27; # CONS_MED_ABOVE +MBlw = 28; # CONS_MED_BELOW +MPst = 29; # CONS_MED_POST +MPre = 30; # CONS_MED_PRE +CMAbv = 31; # CONS_MOD_ABOVE +CMBlw = 32; # CONS_MOD_BELOW +VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST +VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST +VPst = 35; # VOWEL_POST UIPC = Right +VPre = 36; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST +VMAbv = 37; # VOWEL_MOD_ABOVE +VMBlw = 38; # VOWEL_MOD_BELOW +VMPst = 39; # VOWEL_MOD_POST +VMPre = 40; # VOWEL_MOD_PRE +SMAbv = 41; # SYM_MOD_ABOVE +SMBlw = 42; # SYM_MOD_BELOW + + +consonant_modifiers = CMAbv* CMBlw* ((H B | SUB) VS? CMAbv? CMBlw*)*; +medial_consonants = MPre? MAbv? MBlw? MPst?; +dependent_vowels = VPre* VAbv* VBlw* VPst*; +vowel_modifiers = VMPre* VMAbv* VMBlw* VMPst*; +final_consonants = FAbv* FBlw* FPst* FM?; + +virama_terminated_cluster = + R? (B | GB | IV) VS? + consonant_modifiers + H +; +consonant_cluster = + R? (B | GB) VS? + consonant_modifiers + medial_consonants + dependent_vowels + vowel_modifiers + final_consonants +; +vowel_cluster = + R? (IV) VS? + consonant_modifiers + medial_consonants + vowel_modifiers + final_consonants +; + +number_joiner_terminated_cluster = N VS? (H N VS?)* H; +numeral_cluster = N VS? (H N VS?)*; +symbol_cluster = S VS? SMAbv* SMBlw*; +independent_cluster = (IND | O | Rsv | WJ) VS?; + +main := |* + independent_cluster => { found_syllable (independent_cluster); }; + virama_terminated_cluster => { found_syllable (virama_terminated_cluster); }; + consonant_cluster => { found_syllable (consonant_cluster); }; + vowel_cluster => { found_syllable (vowel_cluster); }; + number_joiner_terminated_cluster => { found_syllable (number_joiner_terminated_cluster); }; + numeral_cluster => { found_syllable (numeral_cluster); }; + symbol_cluster => { found_syllable (symbol_cluster); }; +*|; + + +}%% + +#define found_syllable(syllable_type) \ + HB_STMT_START { \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \ + for (unsigned int i = last; i < p+1; i++) \ + info[i].syllable() = (syllable_serial << 4) | syllable_type; \ + last = p+1; \ + syllable_serial++; \ + if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ + } HB_STMT_END + +static void +find_syllables (hb_buffer_t *buffer) +{ + unsigned int p, pe, eof, ts HB_UNUSED, te HB_UNUSED, act HB_UNUSED; + int cs; + hb_glyph_info_t *info = buffer->info; + %%{ + write init; + getkey info[p].use_category(); + }%% + + p = 0; + pe = eof = buffer->len; + + unsigned int last = 0; + unsigned int syllable_serial = 1; + %%{ + write exec; + }%% +} + +#undef found_syllable + +#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */ diff --git a/src/hb-ot-shape-complex-use-private.hh b/src/hb-ot-shape-complex-use-private.hh index e69de29bb..73ecc3bfd 100644 --- a/src/hb-ot-shape-complex-use-private.hh +++ b/src/hb-ot-shape-complex-use-private.hh @@ -0,0 +1,160 @@ +/* + * Copyright © 2015 Mozilla Foundation. + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH +#define HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH + +#include "hb-private.hh" + + +#include "hb-ot-shape-complex-private.hh" + + +#define USE_TABLE_ELEMENT_TYPE uint16_t + +/* Cateories used in the Universal Shaping Engine spec: + * https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm + */ +/* Note: This enum is duplicated in the -machine.rl source file. + * Not sure how to avoid duplication. */ +enum use_category_t { + USE_O = 0, /* OTHER */ + + USE_B = 1, /* BASE */ + USE_IV = 2, /* BASE_VOWEL */ + USE_IND = 3, /* BASE_IND */ + USE_N = 4, /* BASE_NUM */ + USE_GB = 5, /* BASE_OTHER */ + USE_CGJ = 6, /* CGJ */ +// USE_F = 7, /* CONS_FINAL */ + USE_FM = 8, /* CONS_FINAL_MOD */ +// USE_M = 9, /* CONS_MED */ +// USE_CM = 10, /* CONS_MOD */ + USE_SUB = 11, /* CONS_SUB */ + USE_H = 12, /* HALANT */ + USE_HN = 13, /* HALANT_NUM */ + USE_ZWNJ = 14, /* Zero width non-joiner */ + USE_ZWJ = 15, /* Zero width joiner */ + USE_WJ = 16, /* Word joiner */ + USE_Rsv = 17, /* Reserved characters */ + USE_R = 18, /* REPHA */ + USE_S = 19, /* SYM */ +// USE_SM = 20, /* SYM_MOD */ + USE_VS = 21, /* VARIATION_SELECTOR */ +// USE_V = 22, /* VOWEL */ +// USE_VM = 23, /* VOWEL_MOD */ + + USE_FAbv = 24, /* CONS_FINAL_ABOVE */ + USE_FBlw = 25, /* CONS_FINAL_BELOW */ + USE_FPst = 26, /* CONS_FINAL_POST */ + USE_MAbv = 27, /* CONS_MED_ABOVE */ + USE_MBlw = 28, /* CONS_MED_BELOW */ + USE_MPst = 29, /* CONS_MED_POST */ + USE_MPre = 30, /* CONS_MED_PRE */ + USE_CMAbv = 31, /* CONS_MOD_ABOVE */ + USE_CMBlw = 32, /* CONS_MOD_BELOW */ + USE_VAbv = 33, /* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */ + USE_VBlw = 34, /* VOWEL_BELOW / VOWEL_BELOW_POST */ + USE_VPst = 35, /* VOWEL_POST UIPC = Right */ + USE_VPre = 36, /* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */ + USE_VMAbv = 37, /* VOWEL_MOD_ABOVE */ + USE_VMBlw = 38, /* VOWEL_MOD_BELOW */ + USE_VMPst = 39, /* VOWEL_MOD_POST */ + USE_VMPre = 40, /* VOWEL_MOD_PRE */ + USE_SMAbv = 41, /* SYM_MOD_ABOVE */ + USE_SMBlw = 42 /* SYM_MOD_BELOW */ +}; + +/* Categories used in IndicSyllabicCategory.txt from UCD. */ +enum indic_syllabic_category_t +{ + INDIC_SYLLABIC_CATEGORY_AVAGRAHA, + INDIC_SYLLABIC_CATEGORY_BINDU, + INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER, + INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK, + INDIC_SYLLABIC_CATEGORY_CONSONANT, + INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD, + INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL, + INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER, + INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER, + INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL, + INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER, + INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA, + INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED, + INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED, + INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA, + INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER, + INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK, + INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER, + INDIC_SYLLABIC_CATEGORY_JOINER, + INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER, + INDIC_SYLLABIC_CATEGORY_NON_JOINER, + INDIC_SYLLABIC_CATEGORY_NUKTA, + INDIC_SYLLABIC_CATEGORY_NUMBER, + INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER, + INDIC_SYLLABIC_CATEGORY_OTHER, + INDIC_SYLLABIC_CATEGORY_PURE_KILLER, + INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER, + INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER, + INDIC_SYLLABIC_CATEGORY_TONE_LETTER, + INDIC_SYLLABIC_CATEGORY_TONE_MARK, + INDIC_SYLLABIC_CATEGORY_VIRAMA, + INDIC_SYLLABIC_CATEGORY_VISARGA, + INDIC_SYLLABIC_CATEGORY_VOWEL, + INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT, + INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT, +}; + +/* Categories used in IndicPositionalCategory.txt from UCD */ +enum indic_matra_category_t { + INDIC_POSITIONAL_CATEGORY_BOTTOM, + INDIC_POSITIONAL_CATEGORY_BOTTOM_AND_RIGHT, + INDIC_POSITIONAL_CATEGORY_LEFT, + INDIC_POSITIONAL_CATEGORY_LEFT_AND_RIGHT, + INDIC_POSITIONAL_CATEGORY_NOT_APPLICABLE, + INDIC_POSITIONAL_CATEGORY_OVERSTRUCK, + INDIC_POSITIONAL_CATEGORY_RIGHT, + INDIC_POSITIONAL_CATEGORY_TOP, + INDIC_POSITIONAL_CATEGORY_TOP_AND_BOTTOM, + INDIC_POSITIONAL_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT, + INDIC_POSITIONAL_CATEGORY_TOP_AND_LEFT, + INDIC_POSITIONAL_CATEGORY_TOP_AND_LEFT_AND_RIGHT, + INDIC_POSITIONAL_CATEGORY_TOP_AND_RIGHT, + INDIC_POSITIONAL_CATEGORY_VISUAL_ORDER_LEFT, +}; + +/* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and the comma operation + * because gcc fails to optimize the latter and fills the table in at runtime. */ +#define USE_COMBINE_CATEGORIES(S,P) \ + (ASSERT_STATIC_EXPR_ZERO (S < 255 && P < 255) + \ + ((P << 8) | S)) + +HB_INTERNAL USE_TABLE_ELEMENT_TYPE +hb_use_get_categories (hb_codepoint_t u); + +#endif /* HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH */