[indic] Add syllable recognition state machine
Using an incredible tool called Ragel.
This commit is contained in:
parent
422e08dbb8
commit
867361c3ad
|
@ -56,6 +56,7 @@ HBSOURCES += \
|
|||
hb-ot-shape-complex-arabic.cc \
|
||||
hb-ot-shape-complex-arabic-table.hh \
|
||||
hb-ot-shape-complex-indic.cc \
|
||||
hb-ot-shape-complex-indic-machine.hh \
|
||||
hb-ot-shape-complex-indic-table.hh \
|
||||
hb-ot-shape-complex-private.hh \
|
||||
hb-ot-shape-private.hh \
|
||||
|
@ -121,6 +122,10 @@ GENERATORS = \
|
|||
|
||||
EXTRA_DIST += $(GENERATORS)
|
||||
|
||||
BUILT_SOURCES = hb-ot-shape-complex-indic-machine.hh
|
||||
hb-ot-shape-complex-indic-machine.hh: hb-ot-shape-complex-indic-machine.rl
|
||||
$(AM_V_GEN)ragel -e -F1 -o $@ $^
|
||||
|
||||
noinst_PROGRAMS = main test
|
||||
bin_PROGRAMS =
|
||||
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Copyright © 2011 Google, Inc.
|
||||
*
|
||||
* This is part of HarfBuzz, a text shaping library.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
* license or royalty fees, to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose, provided that the
|
||||
* above copyright notice and the following two paragraphs appear in
|
||||
* all copies of this software.
|
||||
*
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
* DAMAGE.
|
||||
*
|
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
*
|
||||
* Google Author(s): Behdad Esfahbod
|
||||
*/
|
||||
|
||||
#ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
|
||||
#define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH
|
||||
|
||||
#include "hb-private.hh"
|
||||
|
||||
HB_BEGIN_DECLS
|
||||
|
||||
%%{
|
||||
machine indic_syllable_machine;
|
||||
alphtype unsigned char;
|
||||
write data;
|
||||
}%%
|
||||
|
||||
%%{
|
||||
|
||||
X = 0;
|
||||
C = 1;
|
||||
Ra = 2;
|
||||
V = 3;
|
||||
N = 4;
|
||||
H = 5;
|
||||
ZWNJ = 6;
|
||||
ZWJ = 7;
|
||||
M = 8;
|
||||
SM = 9;
|
||||
VD = 10;
|
||||
A = 11;
|
||||
NBSP = 12;
|
||||
|
||||
c = C | Ra;
|
||||
z = ZWJ|ZWNJ;
|
||||
matra_group = M N? H?;
|
||||
syllable_tail = SM? (VD VD?)?;
|
||||
|
||||
action matched_syllable {
|
||||
//fprintf (stderr, "Syll %d\n", p);
|
||||
}
|
||||
|
||||
consonant_syllable = (c.N? (z.H|H.z?))* c.N? A? (H.z? | matra_group*)? syllable_tail %(matched_syllable);
|
||||
vowel_syllable = (Ra H)? V N? (z.H.c | ZWJ.c)? matra_group* syllable_tail;
|
||||
standalone_cluster = (Ra H)? NBSP N? (z? H c)? matra_group* syllable_tail;
|
||||
non_indic = X;
|
||||
|
||||
syllable =
|
||||
consonant_syllable
|
||||
| vowel_syllable
|
||||
| standalone_cluster
|
||||
| non_indic
|
||||
;
|
||||
|
||||
main := syllable**;
|
||||
|
||||
}%%
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static void
|
||||
find_syllables (hb_ot_shape_context_t *c)
|
||||
{
|
||||
unsigned int p, pe, eof;
|
||||
int cs;
|
||||
%%{
|
||||
write init;
|
||||
getkey c->buffer->info[p].indic_category();
|
||||
}%%
|
||||
|
||||
p = 0;
|
||||
pe = eof = c->buffer->len;
|
||||
|
||||
%%{
|
||||
write exec;
|
||||
}%%
|
||||
}
|
||||
|
||||
HB_END_DECLS
|
||||
|
||||
#endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */
|
|
@ -30,16 +30,18 @@ HB_BEGIN_DECLS
|
|||
|
||||
|
||||
/* buffer var allocations */
|
||||
#define indic_categories() var2.u32 /* indic shaping action */
|
||||
#define indic_category() var2.u8[0] /* indic_category_t */
|
||||
#define indic_position() var2.u8[1] /* indic_matra_category_t */
|
||||
|
||||
#define INDIC_TABLE_ELEMENT_TYPE uint8_t
|
||||
|
||||
/* Cateories used in the OpenType spec:
|
||||
* https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx
|
||||
*/
|
||||
enum {
|
||||
enum indic_category_t {
|
||||
OT_X = 0,
|
||||
OT_C,
|
||||
OT_Ra,
|
||||
OT_V,
|
||||
OT_N,
|
||||
OT_H,
|
||||
|
@ -147,7 +149,6 @@ static const hb_tag_t indic_other_features[] =
|
|||
};
|
||||
|
||||
|
||||
|
||||
void
|
||||
_hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, const hb_segment_properties_t *props HB_UNUSED)
|
||||
{
|
||||
|
@ -158,6 +159,11 @@ _hb_ot_shape_complex_collect_features_indic (hb_ot_shape_planner_t *planner, con
|
|||
planner->map.add_bool_feature (indic_other_features[i], true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include "hb-ot-shape-complex-indic-machine.hh"
|
||||
|
||||
|
||||
void
|
||||
_hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c)
|
||||
{
|
||||
|
@ -165,11 +171,14 @@ _hb_ot_shape_complex_setup_masks_indic (hb_ot_shape_context_t *c)
|
|||
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
{
|
||||
unsigned int this_type = get_indic_categories (c->buffer->info[i].codepoint);
|
||||
unsigned int type = get_indic_categories (c->buffer->info[i].codepoint);
|
||||
|
||||
c->buffer->info[i].indic_categories() = this_type;
|
||||
c->buffer->info[i].indic_category() = type & 0x0F;
|
||||
c->buffer->info[i].indic_position() = type >> 4;
|
||||
}
|
||||
|
||||
find_syllables (c);
|
||||
|
||||
hb_mask_t mask_array[ARRAY_LENGTH (indic_basic_features)] = {0};
|
||||
unsigned int num_masks = ARRAY_LENGTH (indic_basic_features);
|
||||
for (unsigned int i = 0; i < num_masks; i++)
|
||||
|
|
Loading…
Reference in New Issue