[USE] Put a Ragel machine together
Grammar from the spec!
This commit is contained in:
parent
fd74b939b1
commit
e0eabd7f67
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* Copyright © 2015 Mozilla Foundation.
|
||||
* Copyright © 2015 Google, Inc.
|
||||
*
|
||||
* This is part of HarfBuzz, a text shaping library.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
* license or royalty fees, to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose, provided that the
|
||||
* above copyright notice and the following two paragraphs appear in
|
||||
* all copies of this software.
|
||||
*
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
* DAMAGE.
|
||||
*
|
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
*
|
||||
* Mozilla Author(s): Jonathan Kew
|
||||
* Google Author(s): Behdad Esfahbod
|
||||
*/
|
||||
|
||||
#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
|
||||
#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH
|
||||
|
||||
#include "hb-private.hh"
|
||||
|
||||
%%{
|
||||
machine use_syllable_machine;
|
||||
alphtype unsigned char;
|
||||
write data;
|
||||
}%%
|
||||
|
||||
%%{
|
||||
|
||||
# Same order as enum use_category_t. Not sure how to avoid duplication.
|
||||
|
||||
O = 0; # OTHER
|
||||
|
||||
B = 1; # BASE
|
||||
IV = 2; # BASE_VOWEL
|
||||
IND = 3; # BASE_IND
|
||||
N = 4; # BASE_NUM
|
||||
GB = 5; # BASE_OTHER
|
||||
CGJ = 6; # CGJ
|
||||
#F = 7; # CONS_FINAL
|
||||
FM = 8; # CONS_FINAL_MOD
|
||||
#M = 9; # CONS_MED
|
||||
#CM = 10; # CONS_MOD
|
||||
SUB = 11; # CONS_SUB
|
||||
H = 12; # HALANT
|
||||
HN = 13; # HALANT_NUM
|
||||
ZWNJ = 14; # Zero width non-joiner
|
||||
ZWJ = 15; # Zero width joiner
|
||||
WJ = 16; # Word joiner
|
||||
Rsv = 17; # Reserved characters
|
||||
R = 18; # REPHA
|
||||
S = 19; # SYM
|
||||
#SM = 20; # SYM_MOD
|
||||
VS = 21; # VARIATION_SELECTOR
|
||||
#V = 22; # VOWEL
|
||||
#VM = 23; # VOWEL_MOD
|
||||
|
||||
FAbv = 24; # CONS_FINAL_ABOVE
|
||||
FBlw = 25; # CONS_FINAL_BELOW
|
||||
FPst = 26; # CONS_FINAL_POST
|
||||
MAbv = 27; # CONS_MED_ABOVE
|
||||
MBlw = 28; # CONS_MED_BELOW
|
||||
MPst = 29; # CONS_MED_POST
|
||||
MPre = 30; # CONS_MED_PRE
|
||||
CMAbv = 31; # CONS_MOD_ABOVE
|
||||
CMBlw = 32; # CONS_MOD_BELOW
|
||||
VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
|
||||
VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST
|
||||
VPst = 35; # VOWEL_POST UIPC = Right
|
||||
VPre = 36; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
|
||||
VMAbv = 37; # VOWEL_MOD_ABOVE
|
||||
VMBlw = 38; # VOWEL_MOD_BELOW
|
||||
VMPst = 39; # VOWEL_MOD_POST
|
||||
VMPre = 40; # VOWEL_MOD_PRE
|
||||
SMAbv = 41; # SYM_MOD_ABOVE
|
||||
SMBlw = 42; # SYM_MOD_BELOW
|
||||
|
||||
|
||||
consonant_modifiers = CMAbv* CMBlw* ((H B | SUB) VS? CMAbv? CMBlw*)*;
|
||||
medial_consonants = MPre? MAbv? MBlw? MPst?;
|
||||
dependent_vowels = VPre* VAbv* VBlw* VPst*;
|
||||
vowel_modifiers = VMPre* VMAbv* VMBlw* VMPst*;
|
||||
final_consonants = FAbv* FBlw* FPst* FM?;
|
||||
|
||||
virama_terminated_cluster =
|
||||
R? (B | GB | IV) VS?
|
||||
consonant_modifiers
|
||||
H
|
||||
;
|
||||
consonant_cluster =
|
||||
R? (B | GB) VS?
|
||||
consonant_modifiers
|
||||
medial_consonants
|
||||
dependent_vowels
|
||||
vowel_modifiers
|
||||
final_consonants
|
||||
;
|
||||
vowel_cluster =
|
||||
R? (IV) VS?
|
||||
consonant_modifiers
|
||||
medial_consonants
|
||||
vowel_modifiers
|
||||
final_consonants
|
||||
;
|
||||
|
||||
number_joiner_terminated_cluster = N VS? (H N VS?)* H;
|
||||
numeral_cluster = N VS? (H N VS?)*;
|
||||
symbol_cluster = S VS? SMAbv* SMBlw*;
|
||||
independent_cluster = (IND | O | Rsv | WJ) VS?;
|
||||
|
||||
main := |*
|
||||
independent_cluster => { found_syllable (independent_cluster); };
|
||||
virama_terminated_cluster => { found_syllable (virama_terminated_cluster); };
|
||||
consonant_cluster => { found_syllable (consonant_cluster); };
|
||||
vowel_cluster => { found_syllable (vowel_cluster); };
|
||||
number_joiner_terminated_cluster => { found_syllable (number_joiner_terminated_cluster); };
|
||||
numeral_cluster => { found_syllable (numeral_cluster); };
|
||||
symbol_cluster => { found_syllable (symbol_cluster); };
|
||||
*|;
|
||||
|
||||
|
||||
}%%
|
||||
|
||||
#define found_syllable(syllable_type) \
|
||||
HB_STMT_START { \
|
||||
if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \
|
||||
for (unsigned int i = last; i < p+1; i++) \
|
||||
info[i].syllable() = (syllable_serial << 4) | syllable_type; \
|
||||
last = p+1; \
|
||||
syllable_serial++; \
|
||||
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
|
||||
} HB_STMT_END
|
||||
|
||||
static void
|
||||
find_syllables (hb_buffer_t *buffer)
|
||||
{
|
||||
unsigned int p, pe, eof, ts HB_UNUSED, te HB_UNUSED, act HB_UNUSED;
|
||||
int cs;
|
||||
hb_glyph_info_t *info = buffer->info;
|
||||
%%{
|
||||
write init;
|
||||
getkey info[p].use_category();
|
||||
}%%
|
||||
|
||||
p = 0;
|
||||
pe = eof = buffer->len;
|
||||
|
||||
unsigned int last = 0;
|
||||
unsigned int syllable_serial = 1;
|
||||
%%{
|
||||
write exec;
|
||||
}%%
|
||||
}
|
||||
|
||||
#undef found_syllable
|
||||
|
||||
#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* Copyright © 2015 Mozilla Foundation.
|
||||
* Copyright © 2015 Google, Inc.
|
||||
*
|
||||
* This is part of HarfBuzz, a text shaping library.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
* license or royalty fees, to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose, provided that the
|
||||
* above copyright notice and the following two paragraphs appear in
|
||||
* all copies of this software.
|
||||
*
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
* DAMAGE.
|
||||
*
|
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
*
|
||||
* Mozilla Author(s): Jonathan Kew
|
||||
* Google Author(s): Behdad Esfahbod
|
||||
*/
|
||||
|
||||
#ifndef HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH
|
||||
#define HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH
|
||||
|
||||
#include "hb-private.hh"
|
||||
|
||||
|
||||
#include "hb-ot-shape-complex-private.hh"
|
||||
|
||||
|
||||
#define USE_TABLE_ELEMENT_TYPE uint16_t
|
||||
|
||||
/* Cateories used in the Universal Shaping Engine spec:
|
||||
* https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm
|
||||
*/
|
||||
/* Note: This enum is duplicated in the -machine.rl source file.
|
||||
* Not sure how to avoid duplication. */
|
||||
enum use_category_t {
|
||||
USE_O = 0, /* OTHER */
|
||||
|
||||
USE_B = 1, /* BASE */
|
||||
USE_IV = 2, /* BASE_VOWEL */
|
||||
USE_IND = 3, /* BASE_IND */
|
||||
USE_N = 4, /* BASE_NUM */
|
||||
USE_GB = 5, /* BASE_OTHER */
|
||||
USE_CGJ = 6, /* CGJ */
|
||||
// USE_F = 7, /* CONS_FINAL */
|
||||
USE_FM = 8, /* CONS_FINAL_MOD */
|
||||
// USE_M = 9, /* CONS_MED */
|
||||
// USE_CM = 10, /* CONS_MOD */
|
||||
USE_SUB = 11, /* CONS_SUB */
|
||||
USE_H = 12, /* HALANT */
|
||||
USE_HN = 13, /* HALANT_NUM */
|
||||
USE_ZWNJ = 14, /* Zero width non-joiner */
|
||||
USE_ZWJ = 15, /* Zero width joiner */
|
||||
USE_WJ = 16, /* Word joiner */
|
||||
USE_Rsv = 17, /* Reserved characters */
|
||||
USE_R = 18, /* REPHA */
|
||||
USE_S = 19, /* SYM */
|
||||
// USE_SM = 20, /* SYM_MOD */
|
||||
USE_VS = 21, /* VARIATION_SELECTOR */
|
||||
// USE_V = 22, /* VOWEL */
|
||||
// USE_VM = 23, /* VOWEL_MOD */
|
||||
|
||||
USE_FAbv = 24, /* CONS_FINAL_ABOVE */
|
||||
USE_FBlw = 25, /* CONS_FINAL_BELOW */
|
||||
USE_FPst = 26, /* CONS_FINAL_POST */
|
||||
USE_MAbv = 27, /* CONS_MED_ABOVE */
|
||||
USE_MBlw = 28, /* CONS_MED_BELOW */
|
||||
USE_MPst = 29, /* CONS_MED_POST */
|
||||
USE_MPre = 30, /* CONS_MED_PRE */
|
||||
USE_CMAbv = 31, /* CONS_MOD_ABOVE */
|
||||
USE_CMBlw = 32, /* CONS_MOD_BELOW */
|
||||
USE_VAbv = 33, /* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */
|
||||
USE_VBlw = 34, /* VOWEL_BELOW / VOWEL_BELOW_POST */
|
||||
USE_VPst = 35, /* VOWEL_POST UIPC = Right */
|
||||
USE_VPre = 36, /* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */
|
||||
USE_VMAbv = 37, /* VOWEL_MOD_ABOVE */
|
||||
USE_VMBlw = 38, /* VOWEL_MOD_BELOW */
|
||||
USE_VMPst = 39, /* VOWEL_MOD_POST */
|
||||
USE_VMPre = 40, /* VOWEL_MOD_PRE */
|
||||
USE_SMAbv = 41, /* SYM_MOD_ABOVE */
|
||||
USE_SMBlw = 42 /* SYM_MOD_BELOW */
|
||||
};
|
||||
|
||||
/* Categories used in IndicSyllabicCategory.txt from UCD. */
|
||||
enum indic_syllabic_category_t
|
||||
{
|
||||
INDIC_SYLLABIC_CATEGORY_AVAGRAHA,
|
||||
INDIC_SYLLABIC_CATEGORY_BINDU,
|
||||
INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER,
|
||||
INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA,
|
||||
INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER,
|
||||
INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK,
|
||||
INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER,
|
||||
INDIC_SYLLABIC_CATEGORY_JOINER,
|
||||
INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER,
|
||||
INDIC_SYLLABIC_CATEGORY_NON_JOINER,
|
||||
INDIC_SYLLABIC_CATEGORY_NUKTA,
|
||||
INDIC_SYLLABIC_CATEGORY_NUMBER,
|
||||
INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER,
|
||||
INDIC_SYLLABIC_CATEGORY_OTHER,
|
||||
INDIC_SYLLABIC_CATEGORY_PURE_KILLER,
|
||||
INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER,
|
||||
INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER,
|
||||
INDIC_SYLLABIC_CATEGORY_TONE_LETTER,
|
||||
INDIC_SYLLABIC_CATEGORY_TONE_MARK,
|
||||
INDIC_SYLLABIC_CATEGORY_VIRAMA,
|
||||
INDIC_SYLLABIC_CATEGORY_VISARGA,
|
||||
INDIC_SYLLABIC_CATEGORY_VOWEL,
|
||||
INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT,
|
||||
INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT,
|
||||
};
|
||||
|
||||
/* Categories used in IndicPositionalCategory.txt from UCD */
|
||||
enum indic_matra_category_t {
|
||||
INDIC_POSITIONAL_CATEGORY_BOTTOM,
|
||||
INDIC_POSITIONAL_CATEGORY_BOTTOM_AND_RIGHT,
|
||||
INDIC_POSITIONAL_CATEGORY_LEFT,
|
||||
INDIC_POSITIONAL_CATEGORY_LEFT_AND_RIGHT,
|
||||
INDIC_POSITIONAL_CATEGORY_NOT_APPLICABLE,
|
||||
INDIC_POSITIONAL_CATEGORY_OVERSTRUCK,
|
||||
INDIC_POSITIONAL_CATEGORY_RIGHT,
|
||||
INDIC_POSITIONAL_CATEGORY_TOP,
|
||||
INDIC_POSITIONAL_CATEGORY_TOP_AND_BOTTOM,
|
||||
INDIC_POSITIONAL_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT,
|
||||
INDIC_POSITIONAL_CATEGORY_TOP_AND_LEFT,
|
||||
INDIC_POSITIONAL_CATEGORY_TOP_AND_LEFT_AND_RIGHT,
|
||||
INDIC_POSITIONAL_CATEGORY_TOP_AND_RIGHT,
|
||||
INDIC_POSITIONAL_CATEGORY_VISUAL_ORDER_LEFT,
|
||||
};
|
||||
|
||||
/* Note: We use ASSERT_STATIC_EXPR_ZERO() instead of ASSERT_STATIC_EXPR() and the comma operation
|
||||
* because gcc fails to optimize the latter and fills the table in at runtime. */
|
||||
#define USE_COMBINE_CATEGORIES(S,P) \
|
||||
(ASSERT_STATIC_EXPR_ZERO (S < 255 && P < 255) + \
|
||||
((P << 8) | S))
|
||||
|
||||
HB_INTERNAL USE_TABLE_ELEMENT_TYPE
|
||||
hb_use_get_categories (hb_codepoint_t u);
|
||||
|
||||
#endif /* HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH */
|
Loading…
Reference in New Issue