[indic] Add syllable recognition state machine
Using an incredible tool called Ragel.
This commit is contained in:
parent
422e08dbb8
commit
867361c3ad
|
@ -56,6 +56,7 @@ HBSOURCES += \
|
||||||
hb-ot-shape-complex-arabic.cc \
|
hb-ot-shape-complex-arabic.cc \
|
||||||
hb-ot-shape-complex-arabic-table.hh \
|
hb-ot-shape-complex-arabic-table.hh \
|
||||||
hb-ot-shape-complex-indic.cc \
|
hb-ot-shape-complex-indic.cc \
|
||||||
|
hb-ot-shape-complex-indic-machine.hh \
|
||||||
hb-ot-shape-complex-indic-table.hh \
|
hb-ot-shape-complex-indic-table.hh \
|
||||||
hb-ot-shape-complex-private.hh \
|
hb-ot-shape-complex-private.hh \
|
||||||
hb-ot-shape-private.hh \
|
hb-ot-shape-private.hh \
|
||||||
|
@ -121,6 +122,10 @@ GENERATORS = \
|
||||||
|
|
||||||
EXTRA_DIST += $(GENERATORS)
|
EXTRA_DIST += $(GENERATORS)
|
||||||
|
|
||||||
|
BUILT_SOURCES = hb-ot-shape-complex-indic-machine.hh
|
||||||
|
hb-ot-shape-complex-indic-machine.hh: hb-ot-shape-complex-indic-machine.rl
|
||||||
|
$(AM_V_GEN)ragel -e -F1 -o $@ $^
|
||||||
|
|
||||||
noinst_PROGRAMS = main test
|
noinst_PROGRAMS = main test
|
||||||
bin_PROGRAMS =
|
bin_PROGRAMS =
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,105 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2011 Google, Inc.
|
||||||
|
*
|
||||||
|
* This is part of HarfBuzz, a text shaping library.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, without written agreement and without
|
||||||
|
* license or royalty fees, to use, copy, modify, and distribute this
|
||||||
|
* software and its documentation for any purpose, provided that the
|
||||||
|
* above copyright notice and the following two paragraphs appear in
|
||||||
|
* all copies of this software.
|
||||||
|
*
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||||
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||||
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*
|
||||||
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||||
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||||
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||||
|
*
|
||||||
|
* Google Author(s): Behdad Esfahbod
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
|
||||||
|
#define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
|
||||||
|
|
||||||
|
#include "hb-private.hh"
|
||||||
|
|
||||||
|
HB_BEGIN_DECLS
|
||||||
|
|
||||||
|
%%{
|
||||||
|
machine indic_syllable_machine;
|
||||||
|
alphtype unsigned char;
|
||||||
|
write data;
|
||||||
|
}%%
|
||||||
|
|
||||||
|
%%{
|
||||||
|
|
||||||
|
X = 0;
|
||||||
|
C = 1;
|
||||||
|
Ra = 2;
|
||||||
|
V = 3;
|
||||||
|
N = 4;
|
||||||
|
H = 5;
|
||||||
|
ZWNJ = 6;
|
||||||
|
ZWJ = 7;
|
||||||
|
M = 8;
|
||||||
|
SM = 9;
|
||||||
|
VD = 10;
|
||||||
|
A = 11;
|
||||||
|
NBSP = 12;
|
||||||
|
|
||||||
|
c = C | Ra;
|
||||||
|
z = ZWJ|ZWNJ;
|
||||||
|
matra_group = M N? H?;
|
||||||
|
syllable_tail = SM? (VD VD?)?;
|
||||||
|
|
||||||
|
action matched_syllable {
|
||||||
|
//fprintf (stderr, "Syll %d\n", p);
|
||||||
|
}
|
||||||
|
|
||||||
|
consonant_syllable = (c.N? (z.H|H.z?))* c.N? A? (H.z? | matra_group*)? syllable_tail %(matched_syllable);
|
||||||
|
vowel_syllable = (Ra H)? V N? (z.H.c | ZWJ.c)? matra_group* syllable_tail;
|
||||||
|
standalone_cluster = (Ra H)? NBSP N? (z? H c)? matra_group* syllable_tail;
|
||||||
|
non_indic = X;
|
||||||
|
|
||||||
|
syllable =
|
||||||
|
consonant_syllable
|
||||||
|
| vowel_syllable
|
||||||
|
| standalone_cluster
|
||||||
|
| non_indic
|
||||||
|
;
|
||||||
|
|
||||||
|
main := syllable**;
|
||||||
|
|
||||||
|
}%%
|
||||||
|
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
static void
|
||||||
|
find_syllables (hb_ot_shape_context_t *c)
|
||||||
|
{
|
||||||
|
unsigned int p, pe, eof;
|
||||||
|
int cs;
|
||||||
|
%%{
|
||||||
|
write init;
|
||||||
|
getkey c->buffer->info[p].indic_category();
|
||||||
|
}%%
|
||||||
|
|
||||||
|
p = 0;
|
||||||
|
pe = eof = c->buffer->len;
|
||||||
|
|
||||||
|
%%{
|
||||||
|
write exec;
|
||||||
|
}%%
|
||||||
|
}
|
||||||
|
|
||||||
|
HB_END_DECLS
|
||||||
|
|
||||||
|
#endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */
|
|
@ -30,16 +30,18 @@ HB_BEGIN_DECLS
|
||||||
|
|
||||||
|
|
||||||
/* buffer var allocations */
|
/* buffer var allocations */
|
||||||
#define indic_categories() var2.u32 /* indic shaping action */
|
#define indic_category() var2.u8[0] /* indic_category_t */
|
||||||
|
#define indic_position() var2.u8[1] /* indic_matra_category_t */
|
||||||
|
|
||||||
#define INDIC_TABLE_ELEMENT_TYPE uint8_t
|
#define INDIC_TABLE_ELEMENT_TYPE uint8_t
|
||||||
|
|
||||||
/* Cateories used in the OpenType spec:
|
/* Cateories used in the OpenType spec:
|
||||||
* https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx
|
* https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx
|
||||||
*/
|
*/
|
||||||
enum {
|
enum indic_category_t {
|
||||||
OT_X = 0,
|
OT_X = 0,
|
||||||
OT_C,
|
OT_C,
|
||||||
|
OT_Ra,
|
||||||
OT_V,
|
OT_V,
|
||||||
OT_N,
|
OT_N,
|
||||||
OT_H,
|
OT_H,
|
||||||
|
@ -147,9 +149,8 @@ static const hb_tag_t indic_other_features[] =
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
_hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, const hb_segment_properties_t *props HB_UNUSED)
|
_hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, const hb_segment_properties_t *props HB_UNUSED)
|
||||||
{
|
{
|
||||||
for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++)
|
for (unsigned int i = 0; i < ARRAY_LENGTH (indic_basic_features); i++)
|
||||||
planner->map.add_bool_feature (indic_basic_features[i], false);
|
planner->map.add_bool_feature (indic_basic_features[i], false);
|
||||||
|
@ -158,6 +159,11 @@ _hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, con
|
||||||
planner->map.add_bool_feature (indic_other_features[i], true);
|
planner->map.add_bool_feature (indic_other_features[i], true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "hb-ot-shape-complex-indic-machine.hh"
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
_hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c)
|
_hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c)
|
||||||
{
|
{
|
||||||
|
@ -165,11 +171,14 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c)
|
||||||
|
|
||||||
for (unsigned int i = 0; i < count; i++)
|
for (unsigned int i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
unsigned int this_type = get_indic_categories (c->buffer->info[i].codepoint);
|
unsigned int type = get_indic_categories (c->buffer->info[i].codepoint);
|
||||||
|
|
||||||
c->buffer->info[i].indic_categories() = this_type;
|
c->buffer->info[i].indic_category() = type & 0x0F;
|
||||||
|
c->buffer->info[i].indic_position() = type >> 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
find_syllables (c);
|
||||||
|
|
||||||
hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0};
|
hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0};
|
||||||
unsigned int num_masks = ARRAY_LENGTH (indic_basic_features);
|
unsigned int num_masks = ARRAY_LENGTH (indic_basic_features);
|
||||||
for (unsigned int i = 0; i < num_masks; i++)
|
for (unsigned int i = 0; i < num_masks; i++)
|
||||||
|
|
Loading…
Reference in New Issue