[USE] Put a Ragel machine together

Grammar from the spec!
This commit is contained in:
Behdad Esfahbod 2015-07-20 13:30:51 +01:00
parent fd74b939b1
commit e0eabd7f67
2 changed files with 329 additions and 0 deletions

View File

@ -0,0 +1,169 @@
/*
* Copyright © 2015 Mozilla Foundation.
* Copyright © 2015 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Mozilla Author(s): Jonathan Kew
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
#include "hb-private.hh"
%%{
machine use_syllable_machine;
alphtype unsigned char;
write data;
}%%
%%{
# Same order as enum use_category_t. Not sure how to avoid duplication.
O = 0; # OTHER
B = 1; # BASE
IV = 2; # BASE_VOWEL
IND = 3; # BASE_IND
N = 4; # BASE_NUM
GB = 5; # BASE_OTHER
CGJ = 6; # CGJ
#F = 7; # CONS_FINAL
FM = 8; # CONS_FINAL_MOD
#M = 9; # CONS_MED
#CM = 10; # CONS_MOD
SUB = 11; # CONS_SUB
H = 12; # HALANT
HN = 13; # HALANT_NUM
ZWNJ = 14; # Zero width non-joiner
ZWJ = 15; # Zero width joiner
WJ = 16; # Word joiner
Rsv = 17; # Reserved characters
R = 18; # REPHA
S = 19; # SYM
#SM = 20; # SYM_MOD
VS = 21; # VARIATION_SELECTOR
#V = 22; # VOWEL
#VM = 23; # VOWEL_MOD
FAbv = 24; # CONS_FINAL_ABOVE
FBlw = 25; # CONS_FINAL_BELOW
FPst = 26; # CONS_FINAL_POST
MAbv = 27; # CONS_MED_ABOVE
MBlw = 28; # CONS_MED_BELOW
MPst = 29; # CONS_MED_POST
MPre = 30; # CONS_MED_PRE
CMAbv = 31; # CONS_MOD_ABOVE
CMBlw = 32; # CONS_MOD_BELOW
VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST
VPst = 35; # VOWEL_POST UIPC = Right
VPre = 36; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
VMAbv = 37; # VOWEL_MOD_ABOVE
VMBlw = 38; # VOWEL_MOD_BELOW
VMPst = 39; # VOWEL_MOD_POST
VMPre = 40; # VOWEL_MOD_PRE
SMAbv = 41; # SYM_MOD_ABOVE
SMBlw = 42; # SYM_MOD_BELOW
consonant_modifiers = CMAbv* CMBlw* ((H B | SUB) VS? CMAbv? CMBlw*)*;
medial_consonants = MPre? MAbv? MBlw? MPst?;
dependent_vowels = VPre* VAbv* VBlw* VPst*;
vowel_modifiers = VMPre* VMAbv* VMBlw* VMPst*;
final_consonants = FAbv* FBlw* FPst* FM?;
virama_terminated_cluster =
R? (B | GB | IV) VS?
consonant_modifiers
H
;
consonant_cluster =
R? (B | GB) VS?
consonant_modifiers
medial_consonants
dependent_vowels
vowel_modifiers
final_consonants
;
vowel_cluster =
R? (IV) VS?
consonant_modifiers
medial_consonants
vowel_modifiers
final_consonants
;
number_joiner_terminated_cluster = N VS? (H N VS?)* H;
numeral_cluster = N VS? (H N VS?)*;
symbol_cluster = S VS? SMAbv* SMBlw*;
independent_cluster = (IND | O | Rsv | WJ) VS?;
main := |*
independent_cluster => { found_syllable (independent_cluster); };
virama_terminated_cluster => { found_syllable (virama_terminated_cluster); };
consonant_cluster => { found_syllable (consonant_cluster); };
vowel_cluster => { found_syllable (vowel_cluster); };
number_joiner_terminated_cluster => { found_syllable (number_joiner_terminated_cluster); };
numeral_cluster => { found_syllable (numeral_cluster); };
symbol_cluster => { found_syllable (symbol_cluster); };
*|;
}%%
#define found_syllable(syllable_type) \
HB_STMT_START { \
if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \
for (unsigned int i = last; i < p+1; i++) \
info[i].syllable() = (syllable_serial << 4) | syllable_type; \
last = p+1; \
syllable_serial++; \
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
} HB_STMT_END
static void
find_syllables (hb_buffer_t *buffer)
{
unsigned int p, pe, eof, ts HB_UNUSED, te HB_UNUSED, act HB_UNUSED;
int cs;
hb_glyph_info_t *info = buffer->info;
%%{
write init;
getkey info[p].use_category();
}%%
p = 0;
pe = eof = buffer->len;
unsigned int last = 0;
unsigned int syllable_serial = 1;
%%{
write exec;
}%%
}
#undef found_syllable
#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */

View File

@ -0,0 +1,160 @@
/*
* Copyright © 2015 Mozilla Foundation.
* Copyright © 2015 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Mozilla Author(s): Jonathan Kew
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH
#define HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH
#include "hb-private.hh"
#include "hb-ot-shape-complex-private.hh"
#define USE_TABLE_ELEMENT_TYPE uint16_t
/* Cateories used in the Universal Shaping Engine spec:
* https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
*/
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum use_category_t {
USE_O = 0, /* OTHER */
USE_B = 1, /* BASE */
USE_IV = 2, /* BASE_VOWEL */
USE_IND = 3, /* BASE_IND */
USE_N = 4, /* BASE_NUM */
USE_GB = 5, /* BASE_OTHER */
USE_CGJ = 6, /* CGJ */
// USE_F = 7, /* CONS_FINAL */
USE_FM = 8, /* CONS_FINAL_MOD */
// USE_M = 9, /* CONS_MED */
// USE_CM = 10, /* CONS_MOD */
USE_SUB = 11, /* CONS_SUB */
USE_H = 12, /* HALANT */
USE_HN = 13, /* HALANT_NUM */
USE_ZWNJ = 14, /* Zero width non-joiner */
USE_ZWJ = 15, /* Zero width joiner */
USE_WJ = 16, /* Word joiner */
USE_Rsv = 17, /* Reserved characters */
USE_R = 18, /* REPHA */
USE_S = 19, /* SYM */
// USE_SM = 20, /* SYM_MOD */
USE_VS = 21, /* VARIATION_SELECTOR */
// USE_V = 22, /* VOWEL */
// USE_VM = 23, /* VOWEL_MOD */
USE_FAbv = 24, /* CONS_FINAL_ABOVE */
USE_FBlw = 25, /* CONS_FINAL_BELOW */
USE_FPst = 26, /* CONS_FINAL_POST */
USE_MAbv = 27, /* CONS_MED_ABOVE */
USE_MBlw = 28, /* CONS_MED_BELOW */
USE_MPst = 29, /* CONS_MED_POST */
USE_MPre = 30, /* CONS_MED_PRE */
USE_CMAbv = 31, /* CONS_MOD_ABOVE */
USE_CMBlw = 32, /* CONS_MOD_BELOW */
USE_VAbv = 33, /* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */
USE_VBlw = 34, /* VOWEL_BELOW / VOWEL_BELOW_POST */
USE_VPst = 35, /* VOWEL_POST UIPC = Right */
USE_VPre = 36, /* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */
USE_VMAbv = 37, /* VOWEL_MOD_ABOVE */
USE_VMBlw = 38, /* VOWEL_MOD_BELOW */
USE_VMPst = 39, /* VOWEL_MOD_POST */
USE_VMPre = 40, /* VOWEL_MOD_PRE */
USE_SMAbv = 41, /* SYM_MOD_ABOVE */
USE_SMBlw = 42 /* SYM_MOD_BELOW */
};
/* Categories used in IndicSyllabicCategory.txt from UCD. */
enum indic_syllabic_category_t
{
INDIC_SYLLABIC_CATEGORY_AVAGRAHA,
INDIC_SYLLABIC_CATEGORY_BINDU,
INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER,
INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK,
INDIC_SYLLABIC_CATEGORY_CONSONANT,
INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD,
INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL,
INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER,
INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER,
INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL,
INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER,
INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA,
INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED,
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED,
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA,
INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER,
INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK,
INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER,
INDIC_SYLLABIC_CATEGORY_JOINER,
INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER,
INDIC_SYLLABIC_CATEGORY_NON_JOINER,
INDIC_SYLLABIC_CATEGORY_NUKTA,
INDIC_SYLLABIC_CATEGORY_NUMBER,
INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER,
INDIC_SYLLABIC_CATEGORY_OTHER,
INDIC_SYLLABIC_CATEGORY_PURE_KILLER,
INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER,
INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER,
INDIC_SYLLABIC_CATEGORY_TONE_LETTER,
INDIC_SYLLABIC_CATEGORY_TONE_MARK,
INDIC_SYLLABIC_CATEGORY_VIRAMA,
INDIC_SYLLABIC_CATEGORY_VISARGA,
INDIC_SYLLABIC_CATEGORY_VOWEL,
INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT,
INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT,
};
/* Categories used in IndicPositionalCategory.txt from UCD */
enum indic_matra_category_t {
INDIC_POSITIONAL_CATEGORY_BOTTOM,
INDIC_POSITIONAL_CATEGORY_BOTTOM_AND_RIGHT,
INDIC_POSITIONAL_CATEGORY_LEFT,
INDIC_POSITIONAL_CATEGORY_LEFT_AND_RIGHT,
INDIC_POSITIONAL_CATEGORY_NOT_APPLICABLE,
INDIC_POSITIONAL_CATEGORY_OVERSTRUCK,
INDIC_POSITIONAL_CATEGORY_RIGHT,
INDIC_POSITIONAL_CATEGORY_TOP,
INDIC_POSITIONAL_CATEGORY_TOP_AND_BOTTOM,
INDIC_POSITIONAL_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT,
INDIC_POSITIONAL_CATEGORY_TOP_AND_LEFT,
INDIC_POSITIONAL_CATEGORY_TOP_AND_LEFT_AND_RIGHT,
INDIC_POSITIONAL_CATEGORY_TOP_AND_RIGHT,
INDIC_POSITIONAL_CATEGORY_VISUAL_ORDER_LEFT,
};
/* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and the comma operation
* because gcc fails to optimize the latter and fills the table in at runtime. */
#define USE_COMBINE_CATEGORIES(S,P) \
(ASSERT_STATIC_EXPR_ZERO (S < 255 && P < 255) + \
((P << 8) | S))
HB_INTERNAL USE_TABLE_ELEMENT_TYPE
hb_use_get_categories (hb_codepoint_t u);
#endif /* HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH */