From 867361c3ad39629a8d5b7dc48d558a1c19e37d43 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Fri, 17 Jun 2011 18:35:46 -0400 Subject: [PATCH] [indic] Add syllable recognition state machine Using an incredible tool called Ragel. --- src/Makefile.am | 5 ++ src/hb-ot-shape-complex-indic-machine.rl | 105 +++++++++++++++++++++++ src/hb-ot-shape-complex-indic.cc | 21 +++-- 3 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 src/hb-ot-shape-complex-indic-machine.rl diff --git a/src/Makefile.am b/src/Makefile.am index 8701d94dc..e58bb50a3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -56,6 +56,7 @@ HBSOURCES += \ hb-ot-shape-complex-arabic.cc \ hb-ot-shape-complex-arabic-table.hh \ hb-ot-shape-complex-indic.cc \ + hb-ot-shape-complex-indic-machine.hh \ hb-ot-shape-complex-indic-table.hh \ hb-ot-shape-complex-private.hh \ hb-ot-shape-private.hh \ @@ -121,6 +122,10 @@ GENERATORS = \ EXTRA_DIST += $(GENERATORS) +BUILT_SOURCES = hb-ot-shape-complex-indic-machine.hh +hb-ot-shape-complex-indic-machine.hh: hb-ot-shape-complex-indic-machine.rl + $(AM_V_GEN)ragel -e -F1 -o $@ $^ + noinst_PROGRAMS = main test bin_PROGRAMS = diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl new file mode 100644 index 000000000..ba0b007e6 --- /dev/null +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -0,0 +1,105 @@ +/* + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH +#define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH + +#include "hb-private.hh" + +HB_BEGIN_DECLS + +%%{ + machine indic_syllable_machine; + alphtype unsigned char; + write data; +}%% + +%%{ + +X = 0; +C = 1; +Ra = 2; +V = 3; +N = 4; +H = 5; +ZWNJ = 6; +ZWJ = 7; +M = 8; +SM = 9; +VD = 10; +A = 11; +NBSP = 12; + +c = C | Ra; +z = ZWJ|ZWNJ; +matra_group = M N? H?; +syllable_tail = SM? (VD VD?)?; + +action matched_syllable { + //fprintf (stderr, "Syll %d\n", p); +} + +consonant_syllable = (c.N? (z.H|H.z?))* c.N? A? (H.z? | matra_group*)? syllable_tail %(matched_syllable); +vowel_syllable = (Ra H)? V N? (z.H.c | ZWJ.c)? matra_group* syllable_tail; +standalone_cluster = (Ra H)? NBSP N? (z? H c)? matra_group* syllable_tail; +non_indic = X; + +syllable = + consonant_syllable + | vowel_syllable + | standalone_cluster + | non_indic + ; + +main := syllable**; + +}%% + + +#include +#include + +static void +find_syllables (hb_ot_shape_context_t *c) +{ + unsigned int p, pe, eof; + int cs; + %%{ + write init; + getkey c->buffer->info[p].indic_category(); + }%% + + p = 0; + pe = eof = c->buffer->len; + + %%{ + write exec; + }%% +} + +HB_END_DECLS + +#endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */ diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index c79d0fc6f..c52c5f73b 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -30,16 +30,18 @@ HB_BEGIN_DECLS /* buffer var allocations */ -#define indic_categories() var2.u32 /* indic shaping action */ +#define indic_category() var2.u8[0] /* indic_category_t */ +#define indic_position() var2.u8[1] /* indic_matra_category_t */ #define INDIC_TABLE_ELEMENT_TYPE uint8_t /* Cateories used in the OpenType spec: * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */ -enum { +enum indic_category_t { OT_X = 0, OT_C, + OT_Ra, OT_V, OT_N, OT_H, @@ -147,9 +149,8 @@ static const hb_tag_t indic_other_features[] = }; - void -_hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, const hb_segment_properties_t *props HB_UNUSED) +_hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, const hb_segment_properties_t *props HB_UNUSED) { for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++) planner->map.add_bool_feature (indic_basic_features[i], false); @@ -158,6 +159,11 @@ _hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, con planner->map.add_bool_feature (indic_other_features[i], true); } + + +#include "hb-ot-shape-complex-indic-machine.hh" + + void _hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c) { @@ -165,11 +171,14 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c) for (unsigned int i = 0; i < count; i++) { - unsigned int this_type = get_indic_categories (c->buffer->info[i].codepoint); + unsigned int type = get_indic_categories (c->buffer->info[i].codepoint); - c->buffer->info[i].indic_categories() = this_type; + c->buffer->info[i].indic_category() = type & 0x0F; + c->buffer->info[i].indic_position() = type >> 4; } + find_syllables (c); + hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0}; unsigned int num_masks = ARRAY_LENGTH (indic_basic_features); for (unsigned int i = 0; i < num_masks; i++)