[indic] Add syllable recognition state machine

Using an incredible tool called Ragel.
This commit is contained in:
Behdad Esfahbod 2011-06-17 18:35:46 -04:00
parent 422e08dbb8
commit 867361c3ad
3 changed files with 125 additions and 6 deletions

View File

@ -56,6 +56,7 @@ HBSOURCES += \
hb-ot-shape-complex-arabic.cc \ hb-ot-shape-complex-arabic.cc \
hb-ot-shape-complex-arabic-table.hh \ hb-ot-shape-complex-arabic-table.hh \
hb-ot-shape-complex-indic.cc \ hb-ot-shape-complex-indic.cc \
hb-ot-shape-complex-indic-machine.hh \
hb-ot-shape-complex-indic-table.hh \ hb-ot-shape-complex-indic-table.hh \
hb-ot-shape-complex-private.hh \ hb-ot-shape-complex-private.hh \
hb-ot-shape-private.hh \ hb-ot-shape-private.hh \
@ -121,6 +122,10 @@ GENERATORS = \
EXTRA_DIST += $(GENERATORS) EXTRA_DIST += $(GENERATORS)
BUILT_SOURCES = hb-ot-shape-complex-indic-machine.hh
hb-ot-shape-complex-indic-machine.hh: hb-ot-shape-complex-indic-machine.rl
$(AM_V_GEN)ragel -e -F1 -o $@ $^
noinst_PROGRAMS = main test noinst_PROGRAMS = main test
bin_PROGRAMS = bin_PROGRAMS =

View File

@ -0,0 +1,105 @@
/*
* Copyright © 2011 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
#define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
#include "hb-private.hh"
HB_BEGIN_DECLS
%%{
machine indic_syllable_machine;
alphtype unsigned char;
write data;
}%%
%%{
X = 0;
C = 1;
Ra = 2;
V = 3;
N = 4;
H = 5;
ZWNJ = 6;
ZWJ = 7;
M = 8;
SM = 9;
VD = 10;
A = 11;
NBSP = 12;
c = C | Ra;
z = ZWJ|ZWNJ;
matra_group = M N? H?;
syllable_tail = SM? (VD VD?)?;
action matched_syllable {
//fprintf (stderr, "Syll %d\n", p);
}
consonant_syllable = (c.N? (z.H|H.z?))* c.N? A? (H.z? | matra_group*)? syllable_tail %(matched_syllable);
vowel_syllable = (Ra H)? V N? (z.H.c | ZWJ.c)? matra_group* syllable_tail;
standalone_cluster = (Ra H)? NBSP N? (z? H c)? matra_group* syllable_tail;
non_indic = X;
syllable =
consonant_syllable
| vowel_syllable
| standalone_cluster
| non_indic
;
main := syllable**;
}%%
#include <stdio.h>
#include <string.h>
static void
find_syllables (hb_ot_shape_context_t *c)
{
unsigned int p, pe, eof;
int cs;
%%{
write init;
getkey c->buffer->info[p].indic_category();
}%%
p = 0;
pe = eof = c->buffer->len;
%%{
write exec;
}%%
}
HB_END_DECLS
#endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */

View File

@ -30,16 +30,18 @@ HB_BEGIN_DECLS
/* buffer var allocations */ /* buffer var allocations */
#define indic_categories() var2.u32 /* indic shaping action */ #define indic_category() var2.u8[0] /* indic_category_t */
#define indic_position() var2.u8[1] /* indic_matra_category_t */
#define INDIC_TABLE_ELEMENT_TYPE uint8_t #define INDIC_TABLE_ELEMENT_TYPE uint8_t
/* Cateories used in the OpenType spec: /* Cateories used in the OpenType spec:
* https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx
*/ */
enum { enum indic_category_t {
OT_X = 0, OT_X = 0,
OT_C, OT_C,
OT_Ra,
OT_V, OT_V,
OT_N, OT_N,
OT_H, OT_H,
@ -147,7 +149,6 @@ static const hb_tag_t indic_other_features[] =
}; };
void void
_hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, const hb_segment_properties_t *props HB_UNUSED) _hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, const hb_segment_properties_t *props HB_UNUSED)
{ {
@ -158,6 +159,11 @@ _hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, con
planner->map.add_bool_feature (indic_other_features[i], true); planner->map.add_bool_feature (indic_other_features[i], true);
} }
#include "hb-ot-shape-complex-indic-machine.hh"
void void
_hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c) _hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c)
{ {
@ -165,11 +171,14 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c)
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
{ {
unsigned int this_type = get_indic_categories (c->buffer->info[i].codepoint); unsigned int type = get_indic_categories (c->buffer->info[i].codepoint);
c->buffer->info[i].indic_categories() = this_type; c->buffer->info[i].indic_category() = type & 0x0F;
c->buffer->info[i].indic_position() = type >> 4;
} }
find_syllables (c);
hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0}; hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0};
unsigned int num_masks = ARRAY_LENGTH (indic_basic_features); unsigned int num_masks = ARRAY_LENGTH (indic_basic_features);
for (unsigned int i = 0; i < num_masks; i++) for (unsigned int i = 0; i < num_masks; i++)