Avoid category duplication between C++ and Ragel

This commit is contained in:
David Corbett 2020-10-17 14:37:22 -04:00 committed by Behdad Esfahbod
parent c8c5e52aba
commit 67ea8133d3
17 changed files with 259 additions and 299 deletions

View File

@ -113,6 +113,7 @@ HB_BASE_sources = \
hb-ot-shape-complex-default.cc \ hb-ot-shape-complex-default.cc \
hb-ot-shape-complex-hangul.cc \ hb-ot-shape-complex-hangul.cc \
hb-ot-shape-complex-hebrew.cc \ hb-ot-shape-complex-hebrew.cc \
hb-ot-shape-complex-indic-category.hh \
hb-ot-shape-complex-indic-table.cc \ hb-ot-shape-complex-indic-table.cc \
hb-ot-shape-complex-indic.cc \ hb-ot-shape-complex-indic.cc \
hb-ot-shape-complex-indic.hh \ hb-ot-shape-complex-indic.hh \
@ -122,6 +123,7 @@ HB_BASE_sources = \
hb-ot-shape-complex-myanmar.cc \ hb-ot-shape-complex-myanmar.cc \
hb-ot-shape-complex-myanmar.hh \ hb-ot-shape-complex-myanmar.hh \
hb-ot-shape-complex-thai.cc \ hb-ot-shape-complex-thai.cc \
hb-ot-shape-complex-use-category.hh \
hb-ot-shape-complex-use-table.cc \ hb-ot-shape-complex-use-table.cc \
hb-ot-shape-complex-use.cc \ hb-ot-shape-complex-use.cc \
hb-ot-shape-complex-use.hh \ hb-ot-shape-complex-use.hh \

View File

@ -19,7 +19,7 @@ outdir = os.path.dirname (OUTPUT)
shutil.copy (INPUT, outdir) shutil.copy (INPUT, outdir)
rl = os.path.basename (INPUT) rl = os.path.basename (INPUT)
hh = rl.replace ('.rl', '.hh') hh = rl.replace ('.rl', '.hh')
subprocess.Popen ([ragel, '-e', '-F1', '-o', hh, rl], cwd=outdir).wait () subprocess.Popen ([ragel, '-e', '-F1', '-I', os.path.abspath(CURRENT_SOURCE_DIR), '-o', hh, rl], cwd=outdir).wait ()
# copy it also to src/ # copy it also to src/
shutil.copyfile (os.path.join (outdir, hh), os.path.join (CURRENT_SOURCE_DIR, hh)) shutil.copyfile (os.path.join (outdir, hh), os.path.join (CURRENT_SOURCE_DIR, hh))

View File

@ -0,0 +1,89 @@
/*
* Copyright © 2012 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPE_COMPLEX_INDIC_CATEGORY_HH
#define HB_OT_SHAPE_COMPLEX_INDIC_CATEGORY_HH
/* Categories used in the OpenType specs:
* https://docs.microsoft.com/en-us/typography/script-development/devanagari
* https://docs.microsoft.com/en-us/typography/script-development/khmer
* https://docs.microsoft.com/en-us/typography/script-development/myanmar
*/
#define DEFINE_OT(category) OT_##category
enum indic_category_t {
DEFINE_OT (X = 0),
DEFINE_OT (C = 1),
DEFINE_OT (V = 2),
DEFINE_OT (N = 3),
DEFINE_OT (H = 4),
DEFINE_OT (ZWNJ = 5),
DEFINE_OT (ZWJ = 6),
DEFINE_OT (M = 7),
DEFINE_OT (SM = 8),
/* DEFINE_OT (VD = 9), UNUSED; we use OT_A instead. */
DEFINE_OT (A = 10),
DEFINE_OT (PLACEHOLDER = 11),
DEFINE_OT (DOTTEDCIRCLE = 12),
DEFINE_OT (RS = 13), /* Register Shifter, used in Khmer OT spec. */
DEFINE_OT (Coeng = 14), /* Khmer-style Virama. */
DEFINE_OT (Repha = 15), /* Atomically-encoded logical or visual repha. */
DEFINE_OT (Ra = 16),
DEFINE_OT (CM = 17), /* Consonant-Medial. */
DEFINE_OT (Symbol = 18), /* Avagraha, etc that take marks (SM,A,VD). */
DEFINE_OT (CS = 19),
/* Khmer */
DEFINE_OT (Robatic = 20),
DEFINE_OT (Xgroup = 21),
DEFINE_OT (Ygroup = 22),
/* The following are used by Khmer & Myanmar shapers. */
DEFINE_OT (VAbv = 26),
DEFINE_OT (VBlw = 27),
DEFINE_OT (VPre = 28),
DEFINE_OT (VPst = 29),
/* Myanmar */
DEFINE_OT (IV = 2), /* Independent vowel */
DEFINE_OT (DB = 3), /* Dot below */
DEFINE_OT (VST = 8), /* Visarga and Shan tones */
DEFINE_OT (GB = 11), /* Generic base */
DEFINE_OT (As = 18), /* Asat */
DEFINE_OT (D0 = 20), /* Digit zero */
DEFINE_OT (MH = 21), /* Various consonant medial types */
DEFINE_OT (MR = 22), /* Various consonant medial types */
DEFINE_OT (MW = 23), /* Various consonant medial types */
DEFINE_OT (MY = 24), /* Various consonant medial types */
DEFINE_OT (PT = 25), /* Pwo and other tones */
DEFINE_OT (VS = 30), /* Variation selectors */
DEFINE_OT (P = 31), /* Punctuation */
DEFINE_OT (D = 32), /* Digits except zero */
};
#undef DEFINE_OT
#endif /* HB_OT_SHAPE_COMPLEX_INDIC_CATEGORY_HH */

View File

@ -388,7 +388,7 @@ static const int indic_syllable_machine_en_main = 39;
#line 93 "hb-ot-shape-complex-indic-machine.rl" #line 76 "hb-ot-shape-complex-indic-machine.rl"
#define found_syllable(syllable_type) \ #define found_syllable(syllable_type) \
@ -415,7 +415,7 @@ find_syllables_indic (hb_buffer_t *buffer)
act = 0; act = 0;
} }
#line 113 "hb-ot-shape-complex-indic-machine.rl" #line 96 "hb-ot-shape-complex-indic-machine.rl"
p = 0; p = 0;
@ -460,51 +460,51 @@ _eof_trans:
{te = p+1;} {te = p+1;}
break; break;
case 11: case 11:
#line 89 "hb-ot-shape-complex-indic-machine.rl" #line 72 "hb-ot-shape-complex-indic-machine.rl"
{te = p+1;{ found_syllable (non_indic_cluster); }} {te = p+1;{ found_syllable (non_indic_cluster); }}
break; break;
case 13: case 13:
#line 84 "hb-ot-shape-complex-indic-machine.rl" #line 67 "hb-ot-shape-complex-indic-machine.rl"
{te = p;p--;{ found_syllable (consonant_syllable); }} {te = p;p--;{ found_syllable (consonant_syllable); }}
break; break;
case 14: case 14:
#line 85 "hb-ot-shape-complex-indic-machine.rl" #line 68 "hb-ot-shape-complex-indic-machine.rl"
{te = p;p--;{ found_syllable (vowel_syllable); }} {te = p;p--;{ found_syllable (vowel_syllable); }}
break; break;
case 17: case 17:
#line 86 "hb-ot-shape-complex-indic-machine.rl" #line 69 "hb-ot-shape-complex-indic-machine.rl"
{te = p;p--;{ found_syllable (standalone_cluster); }} {te = p;p--;{ found_syllable (standalone_cluster); }}
break; break;
case 19: case 19:
#line 87 "hb-ot-shape-complex-indic-machine.rl" #line 70 "hb-ot-shape-complex-indic-machine.rl"
{te = p;p--;{ found_syllable (symbol_cluster); }} {te = p;p--;{ found_syllable (symbol_cluster); }}
break; break;
case 15: case 15:
#line 88 "hb-ot-shape-complex-indic-machine.rl" #line 71 "hb-ot-shape-complex-indic-machine.rl"
{te = p;p--;{ found_syllable (broken_cluster); }} {te = p;p--;{ found_syllable (broken_cluster); }}
break; break;
case 16: case 16:
#line 89 "hb-ot-shape-complex-indic-machine.rl" #line 72 "hb-ot-shape-complex-indic-machine.rl"
{te = p;p--;{ found_syllable (non_indic_cluster); }} {te = p;p--;{ found_syllable (non_indic_cluster); }}
break; break;
case 1: case 1:
#line 84 "hb-ot-shape-complex-indic-machine.rl" #line 67 "hb-ot-shape-complex-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (consonant_syllable); }} {{p = ((te))-1;}{ found_syllable (consonant_syllable); }}
break; break;
case 3: case 3:
#line 85 "hb-ot-shape-complex-indic-machine.rl" #line 68 "hb-ot-shape-complex-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (vowel_syllable); }} {{p = ((te))-1;}{ found_syllable (vowel_syllable); }}
break; break;
case 7: case 7:
#line 86 "hb-ot-shape-complex-indic-machine.rl" #line 69 "hb-ot-shape-complex-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (standalone_cluster); }} {{p = ((te))-1;}{ found_syllable (standalone_cluster); }}
break; break;
case 8: case 8:
#line 87 "hb-ot-shape-complex-indic-machine.rl" #line 70 "hb-ot-shape-complex-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (symbol_cluster); }} {{p = ((te))-1;}{ found_syllable (symbol_cluster); }}
break; break;
case 4: case 4:
#line 88 "hb-ot-shape-complex-indic-machine.rl" #line 71 "hb-ot-shape-complex-indic-machine.rl"
{{p = ((te))-1;}{ found_syllable (broken_cluster); }} {{p = ((te))-1;}{ found_syllable (broken_cluster); }}
break; break;
case 6: case 6:
@ -525,19 +525,19 @@ _eof_trans:
case 18: case 18:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 84 "hb-ot-shape-complex-indic-machine.rl" #line 67 "hb-ot-shape-complex-indic-machine.rl"
{act = 1;} {act = 1;}
break; break;
case 5: case 5:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 88 "hb-ot-shape-complex-indic-machine.rl" #line 71 "hb-ot-shape-complex-indic-machine.rl"
{act = 5;} {act = 5;}
break; break;
case 12: case 12:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 89 "hb-ot-shape-complex-indic-machine.rl" #line 72 "hb-ot-shape-complex-indic-machine.rl"
{act = 6;} {act = 6;}
break; break;
#line 544 "hb-ot-shape-complex-indic-machine.hh" #line 544 "hb-ot-shape-complex-indic-machine.hh"
@ -565,7 +565,7 @@ _again:
} }
#line 121 "hb-ot-shape-complex-indic-machine.rl" #line 104 "hb-ot-shape-complex-indic-machine.rl"
} }

View File

@ -37,24 +37,7 @@
%%{ %%{
# Same order as enum indic_category_t. Not sure how to avoid duplication. import "hb-ot-shape-complex-indic-category.hh";
C = 1;
V = 2;
N = 3;
H = 4;
ZWNJ = 5;
ZWJ = 6;
M = 7;
SM = 8;
A = 10;
PLACEHOLDER = 11;
DOTTEDCIRCLE = 12;
RS = 13;
Repha = 15;
Ra = 16;
CM = 17;
Symbol= 18;
CS = 19;
c = (C | Ra); # is_consonant c = (C | Ra); # is_consonant
n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier

View File

@ -30,6 +30,7 @@
#include "hb.hh" #include "hb.hh"
#include "hb-ot-shape-complex.hh" #include "hb-ot-shape-complex.hh"
#include "hb-ot-shape-complex-indic-category.hh"
/* buffer var allocations */ /* buffer var allocations */
@ -39,41 +40,6 @@
#define INDIC_TABLE_ELEMENT_TYPE uint16_t #define INDIC_TABLE_ELEMENT_TYPE uint16_t
/* Cateories used in the OpenType spec:
* https://docs.microsoft.com/en-us/typography/script-development/devanagari
*/
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum indic_category_t {
OT_X = 0,
OT_C = 1,
OT_V = 2,
OT_N = 3,
OT_H = 4,
OT_ZWNJ = 5,
OT_ZWJ = 6,
OT_M = 7,
OT_SM = 8,
/* OT_VD = 9, UNUSED; we use OT_A instead. */
OT_A = 10,
OT_PLACEHOLDER = 11,
OT_DOTTEDCIRCLE = 12,
OT_RS = 13, /* Register Shifter, used in Khmer OT spec. */
OT_Coeng = 14, /* Khmer-style Virama. */
OT_Repha = 15, /* Atomically-encoded logical or visual repha. */
OT_Ra = 16,
OT_CM = 17, /* Consonant-Medial. */
OT_Symbol = 18, /* Avagraha, etc that take marks (SM,A,VD). */
OT_CS = 19,
/* The following are used by Khmer & Myanmar shapers. Defined
* here for them to share. */
OT_VAbv = 26,
OT_VBlw = 27,
OT_VPre = 28,
OT_VPst = 29,
};
#define MEDIAL_FLAGS (FLAG (OT_CM)) #define MEDIAL_FLAGS (FLAG (OT_CM))
/* Note: /* Note:

View File

@ -219,7 +219,7 @@ static const int khmer_syllable_machine_en_main = 20;
#line 80 "hb-ot-shape-complex-khmer-machine.rl" #line 65 "hb-ot-shape-complex-khmer-machine.rl"
#define found_syllable(syllable_type) \ #define found_syllable(syllable_type) \
@ -246,7 +246,7 @@ find_syllables_khmer (hb_buffer_t *buffer)
act = 0; act = 0;
} }
#line 100 "hb-ot-shape-complex-khmer-machine.rl" #line 85 "hb-ot-shape-complex-khmer-machine.rl"
p = 0; p = 0;
@ -291,27 +291,27 @@ _eof_trans:
{te = p+1;} {te = p+1;}
break; break;
case 8: case 8:
#line 76 "hb-ot-shape-complex-khmer-machine.rl" #line 61 "hb-ot-shape-complex-khmer-machine.rl"
{te = p+1;{ found_syllable (non_khmer_cluster); }} {te = p+1;{ found_syllable (non_khmer_cluster); }}
break; break;
case 10: case 10:
#line 74 "hb-ot-shape-complex-khmer-machine.rl" #line 59 "hb-ot-shape-complex-khmer-machine.rl"
{te = p;p--;{ found_syllable (consonant_syllable); }} {te = p;p--;{ found_syllable (consonant_syllable); }}
break; break;
case 12: case 12:
#line 75 "hb-ot-shape-complex-khmer-machine.rl" #line 60 "hb-ot-shape-complex-khmer-machine.rl"
{te = p;p--;{ found_syllable (broken_cluster); }} {te = p;p--;{ found_syllable (broken_cluster); }}
break; break;
case 11: case 11:
#line 76 "hb-ot-shape-complex-khmer-machine.rl" #line 61 "hb-ot-shape-complex-khmer-machine.rl"
{te = p;p--;{ found_syllable (non_khmer_cluster); }} {te = p;p--;{ found_syllable (non_khmer_cluster); }}
break; break;
case 1: case 1:
#line 74 "hb-ot-shape-complex-khmer-machine.rl" #line 59 "hb-ot-shape-complex-khmer-machine.rl"
{{p = ((te))-1;}{ found_syllable (consonant_syllable); }} {{p = ((te))-1;}{ found_syllable (consonant_syllable); }}
break; break;
case 5: case 5:
#line 75 "hb-ot-shape-complex-khmer-machine.rl" #line 60 "hb-ot-shape-complex-khmer-machine.rl"
{{p = ((te))-1;}{ found_syllable (broken_cluster); }} {{p = ((te))-1;}{ found_syllable (broken_cluster); }}
break; break;
case 3: case 3:
@ -329,13 +329,13 @@ _eof_trans:
case 4: case 4:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 75 "hb-ot-shape-complex-khmer-machine.rl" #line 60 "hb-ot-shape-complex-khmer-machine.rl"
{act = 2;} {act = 2;}
break; break;
case 9: case 9:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 76 "hb-ot-shape-complex-khmer-machine.rl" #line 61 "hb-ot-shape-complex-khmer-machine.rl"
{act = 3;} {act = 3;}
break; break;
#line 342 "hb-ot-shape-complex-khmer-machine.hh" #line 342 "hb-ot-shape-complex-khmer-machine.hh"
@ -363,7 +363,7 @@ _again:
} }
#line 108 "hb-ot-shape-complex-khmer-machine.rl" #line 93 "hb-ot-shape-complex-khmer-machine.rl"
} }

View File

@ -37,22 +37,7 @@
%%{ %%{
# Same order as enum khmer_category_t. Not sure how to avoid duplication. import "hb-ot-shape-complex-indic-category.hh";
C = 1;
V = 2;
ZWNJ = 5;
ZWJ = 6;
PLACEHOLDER = 11;
DOTTEDCIRCLE = 12;
Coeng= 14;
Ra = 16;
Robatic = 20;
Xgroup = 21;
Ygroup = 22;
VAbv = 26;
VBlw = 27;
VPre = 28;
VPst = 29;
c = (C | Ra | V); c = (C | Ra | V);
cn = c.((ZWJ|ZWNJ)?.Robatic)?; cn = c.((ZWJ|ZWNJ)?.Robatic)?;

View File

@ -33,28 +33,15 @@
/* buffer var allocations */ /* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */ #define khmer_category() indic_category() /* indic_category_t */
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum khmer_category_t
{
OT_Robatic = 20,
OT_Xgroup = 21,
OT_Ygroup = 22,
//OT_VAbv = 26,
//OT_VBlw = 27,
//OT_VPre = 28,
//OT_VPst = 29,
};
static inline void static inline void
set_khmer_properties (hb_glyph_info_t &info) set_khmer_properties (hb_glyph_info_t &info)
{ {
hb_codepoint_t u = info.codepoint; hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u); unsigned int type = hb_indic_get_categories (u);
khmer_category_t cat = (khmer_category_t) (type & 0x7Fu); indic_category_t cat = (indic_category_t) (type & 0x7Fu);
indic_position_t pos = (indic_position_t) (type >> 8); indic_position_t pos = (indic_position_t) (type >> 8);
@ -66,7 +53,7 @@ set_khmer_properties (hb_glyph_info_t &info)
switch (u) switch (u)
{ {
case 0x179Au: case 0x179Au:
cat = (khmer_category_t) OT_Ra; cat = (indic_category_t) OT_Ra;
break; break;
case 0x17CCu: case 0x17CCu:
@ -96,13 +83,13 @@ set_khmer_properties (hb_glyph_info_t &info)
/* /*
* Re-assign position. * Re-assign position.
*/ */
if (cat == (khmer_category_t) OT_M) if (cat == (indic_category_t) OT_M)
switch ((int) pos) switch ((int) pos)
{ {
case POS_PRE_C: cat = (khmer_category_t) OT_VPre; break; case POS_PRE_C: cat = (indic_category_t) OT_VPre; break;
case POS_BELOW_C: cat = (khmer_category_t) OT_VBlw; break; case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break;
case POS_ABOVE_C: cat = (khmer_category_t) OT_VAbv; break; case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break;
case POS_POST_C: cat = (khmer_category_t) OT_VPst; break; case POS_POST_C: cat = (indic_category_t) OT_VPst; break;
default: assert (0); default: assert (0);
} }

View File

@ -297,7 +297,7 @@ static const int myanmar_syllable_machine_en_main = 0;
#line 94 "hb-ot-shape-complex-myanmar-machine.rl" #line 69 "hb-ot-shape-complex-myanmar-machine.rl"
#define found_syllable(syllable_type) \ #define found_syllable(syllable_type) \
@ -324,7 +324,7 @@ find_syllables_myanmar (hb_buffer_t *buffer)
act = 0; act = 0;
} }
#line 114 "hb-ot-shape-complex-myanmar-machine.rl" #line 89 "hb-ot-shape-complex-myanmar-machine.rl"
p = 0; p = 0;
@ -365,35 +365,35 @@ _eof_trans:
switch ( _myanmar_syllable_machine_trans_actions[_trans] ) { switch ( _myanmar_syllable_machine_trans_actions[_trans] ) {
case 6: case 6:
#line 86 "hb-ot-shape-complex-myanmar-machine.rl" #line 61 "hb-ot-shape-complex-myanmar-machine.rl"
{te = p+1;{ found_syllable (consonant_syllable); }} {te = p+1;{ found_syllable (consonant_syllable); }}
break; break;
case 4: case 4:
#line 87 "hb-ot-shape-complex-myanmar-machine.rl" #line 62 "hb-ot-shape-complex-myanmar-machine.rl"
{te = p+1;{ found_syllable (non_myanmar_cluster); }} {te = p+1;{ found_syllable (non_myanmar_cluster); }}
break; break;
case 10: case 10:
#line 88 "hb-ot-shape-complex-myanmar-machine.rl" #line 63 "hb-ot-shape-complex-myanmar-machine.rl"
{te = p+1;{ found_syllable (punctuation_cluster); }} {te = p+1;{ found_syllable (punctuation_cluster); }}
break; break;
case 8: case 8:
#line 89 "hb-ot-shape-complex-myanmar-machine.rl" #line 64 "hb-ot-shape-complex-myanmar-machine.rl"
{te = p+1;{ found_syllable (broken_cluster); }} {te = p+1;{ found_syllable (broken_cluster); }}
break; break;
case 3: case 3:
#line 90 "hb-ot-shape-complex-myanmar-machine.rl" #line 65 "hb-ot-shape-complex-myanmar-machine.rl"
{te = p+1;{ found_syllable (non_myanmar_cluster); }} {te = p+1;{ found_syllable (non_myanmar_cluster); }}
break; break;
case 5: case 5:
#line 86 "hb-ot-shape-complex-myanmar-machine.rl" #line 61 "hb-ot-shape-complex-myanmar-machine.rl"
{te = p;p--;{ found_syllable (consonant_syllable); }} {te = p;p--;{ found_syllable (consonant_syllable); }}
break; break;
case 7: case 7:
#line 89 "hb-ot-shape-complex-myanmar-machine.rl" #line 64 "hb-ot-shape-complex-myanmar-machine.rl"
{te = p;p--;{ found_syllable (broken_cluster); }} {te = p;p--;{ found_syllable (broken_cluster); }}
break; break;
case 9: case 9:
#line 90 "hb-ot-shape-complex-myanmar-machine.rl" #line 65 "hb-ot-shape-complex-myanmar-machine.rl"
{te = p;p--;{ found_syllable (non_myanmar_cluster); }} {te = p;p--;{ found_syllable (non_myanmar_cluster); }}
break; break;
#line 400 "hb-ot-shape-complex-myanmar-machine.hh" #line 400 "hb-ot-shape-complex-myanmar-machine.hh"
@ -421,7 +421,7 @@ _again:
} }
#line 122 "hb-ot-shape-complex-myanmar-machine.rl" #line 97 "hb-ot-shape-complex-myanmar-machine.rl"
} }

View File

@ -37,32 +37,7 @@
%%{ %%{
# Same order as enum myanmar_category_t. Not sure how to avoid duplication. import "hb-ot-shape-complex-indic-category.hh";
A = 10;
As = 18;
C = 1;
D = 32;
D0 = 20;
DB = 3;
GB = 11;
H = 4;
IV = 2;
MH = 21;
MR = 22;
MW = 23;
MY = 24;
PT = 25;
V = 8;
VAbv = 26;
VBlw = 27;
VPre = 28;
VPst = 29;
VS = 30;
ZWJ = 6;
ZWNJ = 5;
Ra = 16;
P = 31;
CS = 19;
j = ZWJ|ZWNJ; # Joiners j = ZWJ|ZWNJ; # Joiners
k = (Ra As H); # Kinzi k = (Ra As H); # Kinzi
@ -74,11 +49,11 @@ main_vowel_group = (VPre.VS?)* VAbv* VBlw* A* (DB As?)?;
post_vowel_group = VPst MH? As* VAbv* A* (DB As?)?; post_vowel_group = VPst MH? As* VAbv* A* (DB As?)?;
pwo_tone_group = PT A* DB? As?; pwo_tone_group = PT A* DB? As?;
complex_syllable_tail = As* medial_group main_vowel_group post_vowel_group* pwo_tone_group* V* j?; complex_syllable_tail = As* medial_group main_vowel_group post_vowel_group* pwo_tone_group* VST* j?;
syllable_tail = (H (c|IV).VS?)* (H | complex_syllable_tail); syllable_tail = (H (c|IV).VS?)* (H | complex_syllable_tail);
consonant_syllable = (k|CS)? (c|IV|D|GB).VS? syllable_tail; consonant_syllable = (k|CS)? (c|IV|D|GB).VS? syllable_tail;
punctuation_cluster = P V; punctuation_cluster = P VST;
broken_cluster = k? VS? syllable_tail; broken_cluster = k? VS? syllable_tail;
other = any; other = any;

View File

@ -33,32 +33,10 @@
/* buffer var allocations */ /* buffer var allocations */
#define myanmar_category() indic_category() /* myanmar_category_t */ #define myanmar_category() indic_category() /* indic_category_t */
#define myanmar_position() indic_position() /* myanmar_position_t */ #define myanmar_position() indic_position() /* myanmar_position_t */
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum myanmar_category_t {
OT_As = 18, /* Asat */
OT_D0 = 20, /* Digit zero */
OT_DB = OT_N, /* Dot below */
OT_GB = OT_PLACEHOLDER,
OT_MH = 21, /* Various consonant medial types */
OT_MR = 22, /* Various consonant medial types */
OT_MW = 23, /* Various consonant medial types */
OT_MY = 24, /* Various consonant medial types */
OT_PT = 25, /* Pwo and other tones */
//OT_VAbv = 26,
//OT_VBlw = 27,
//OT_VPre = 28,
//OT_VPst = 29,
OT_VS = 30, /* Variation selectors */
OT_P = 31, /* Punctuation */
OT_D = 32, /* Digits except zero */
};
static inline void static inline void
set_myanmar_properties (hb_glyph_info_t &info) set_myanmar_properties (hb_glyph_info_t &info)
{ {
@ -155,11 +133,11 @@ set_myanmar_properties (hb_glyph_info_t &info)
{ {
switch ((int) pos) switch ((int) pos)
{ {
case POS_PRE_C: cat = (myanmar_category_t) OT_VPre; case POS_PRE_C: cat = (indic_category_t) OT_VPre;
pos = POS_PRE_M; break; pos = POS_PRE_M; break;
case POS_ABOVE_C: cat = (myanmar_category_t) OT_VAbv; break; case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break;
case POS_BELOW_C: cat = (myanmar_category_t) OT_VBlw; break; case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break;
case POS_POST_C: cat = (myanmar_category_t) OT_VPst; break; case POS_POST_C: cat = (indic_category_t) OT_VPst; break;
} }
} }

View File

@ -0,0 +1,87 @@
/*
* Copyright © 2015 Mozilla Foundation.
* Copyright © 2015 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Mozilla Author(s): Jonathan Kew
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPE_COMPLEX_USE_CATEGORY_HH
#define HB_OT_SHAPE_COMPLEX_USE_CATEGORY_HH
/* Categories used in the Universal Shaping Engine spec:
* https://docs.microsoft.com/en-us/typography/script-development/use
*/
#define DEFINE_USE(category) USE_##category
enum use_category_t {
DEFINE_USE (O = 0), /* OTHER */
DEFINE_USE (B = 1), /* BASE */
DEFINE_USE (N = 4), /* BASE_NUM */
DEFINE_USE (GB = 5), /* BASE_OTHER */
DEFINE_USE (SUB = 11), /* CONS_SUB */
DEFINE_USE (H = 12), /* HALANT */
DEFINE_USE (HN = 13), /* HALANT_NUM */
DEFINE_USE (ZWNJ = 14), /* Zero width non-joiner */
DEFINE_USE (R = 18), /* REPHA */
DEFINE_USE (S = 19), /* SYM */
DEFINE_USE (CS = 43), /* CONS_WITH_STACKER */
/* https://github.com/harfbuzz/harfbuzz/issues/1102 */
DEFINE_USE (HVM = 44), /* HALANT_OR_VOWEL_MODIFIER */
DEFINE_USE (Sk = 48), /* SAKOT */
DEFINE_USE (G = 49), /* HIEROGLYPH */
DEFINE_USE (J = 50), /* HIEROGLYPH_JOINER */
DEFINE_USE (SB = 51), /* HIEROGLYPH_SEGMENT_BEGIN */
DEFINE_USE (SE = 52), /* HIEROGLYPH_SEGMENT_END */
DEFINE_USE (FAbv = 24), /* CONS_FINAL_ABOVE */
DEFINE_USE (FBlw = 25), /* CONS_FINAL_BELOW */
DEFINE_USE (FPst = 26), /* CONS_FINAL_POST */
DEFINE_USE (MAbv = 27), /* CONS_MED_ABOVE */
DEFINE_USE (MBlw = 28), /* CONS_MED_BELOW */
DEFINE_USE (MPst = 29), /* CONS_MED_POST */
DEFINE_USE (MPre = 30), /* CONS_MED_PRE */
DEFINE_USE (CMAbv = 31), /* CONS_MOD_ABOVE */
DEFINE_USE (CMBlw = 32), /* CONS_MOD_BELOW */
DEFINE_USE (VAbv = 33), /* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */
DEFINE_USE (VBlw = 34), /* VOWEL_BELOW / VOWEL_BELOW_POST */
DEFINE_USE (VPst = 35), /* VOWEL_POST UIPC = Right */
DEFINE_USE (VPre = 22), /* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */
DEFINE_USE (VMAbv = 37), /* VOWEL_MOD_ABOVE */
DEFINE_USE (VMBlw = 38), /* VOWEL_MOD_BELOW */
DEFINE_USE (VMPst = 39), /* VOWEL_MOD_POST */
DEFINE_USE (VMPre = 23), /* VOWEL_MOD_PRE */
DEFINE_USE (SMAbv = 41), /* SYM_MOD_ABOVE */
DEFINE_USE (SMBlw = 42), /* SYM_MOD_BELOW */
DEFINE_USE (FMAbv = 45), /* CONS_FINAL_MOD UIPC = Top */
DEFINE_USE (FMBlw = 46), /* CONS_FINAL_MOD UIPC = Bottom */
DEFINE_USE (FMPst = 47), /* CONS_FINAL_MOD UIPC = Not_Applicable */
};
#undef DEFINE_USE
#endif /* HB_OT_SHAPE_COMPLEX_USE_CATEGORY_HH */

View File

@ -308,7 +308,7 @@ static const int use_syllable_machine_en_main = 2;
#line 154 "hb-ot-shape-complex-use-machine.rl" #line 111 "hb-ot-shape-complex-use-machine.rl"
#define found_syllable(syllable_type) \ #define found_syllable(syllable_type) \
@ -359,7 +359,7 @@ find_syllables_use (hb_buffer_t *buffer)
act = 0; act = 0;
} }
#line 198 "hb-ot-shape-complex-use-machine.rl" #line 155 "hb-ot-shape-complex-use-machine.rl"
unsigned int syllable_serial = 1; unsigned int syllable_serial = 1;
@ -401,59 +401,59 @@ _eof_trans:
{te = p+1;} {te = p+1;}
break; break;
case 5: case 5:
#line 141 "hb-ot-shape-complex-use-machine.rl" #line 98 "hb-ot-shape-complex-use-machine.rl"
{te = p+1;{ found_syllable (independent_cluster); }} {te = p+1;{ found_syllable (independent_cluster); }}
break; break;
case 9: case 9:
#line 144 "hb-ot-shape-complex-use-machine.rl" #line 101 "hb-ot-shape-complex-use-machine.rl"
{te = p+1;{ found_syllable (standard_cluster); }} {te = p+1;{ found_syllable (standard_cluster); }}
break; break;
case 7: case 7:
#line 149 "hb-ot-shape-complex-use-machine.rl" #line 106 "hb-ot-shape-complex-use-machine.rl"
{te = p+1;{ found_syllable (broken_cluster); }} {te = p+1;{ found_syllable (broken_cluster); }}
break; break;
case 6: case 6:
#line 150 "hb-ot-shape-complex-use-machine.rl" #line 107 "hb-ot-shape-complex-use-machine.rl"
{te = p+1;{ found_syllable (non_cluster); }} {te = p+1;{ found_syllable (non_cluster); }}
break; break;
case 10: case 10:
#line 142 "hb-ot-shape-complex-use-machine.rl" #line 99 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (virama_terminated_cluster); }} {te = p;p--;{ found_syllable (virama_terminated_cluster); }}
break; break;
case 11: case 11:
#line 143 "hb-ot-shape-complex-use-machine.rl" #line 100 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (sakot_terminated_cluster); }} {te = p;p--;{ found_syllable (sakot_terminated_cluster); }}
break; break;
case 8: case 8:
#line 144 "hb-ot-shape-complex-use-machine.rl" #line 101 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (standard_cluster); }} {te = p;p--;{ found_syllable (standard_cluster); }}
break; break;
case 13: case 13:
#line 145 "hb-ot-shape-complex-use-machine.rl" #line 102 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (number_joiner_terminated_cluster); }} {te = p;p--;{ found_syllable (number_joiner_terminated_cluster); }}
break; break;
case 12: case 12:
#line 146 "hb-ot-shape-complex-use-machine.rl" #line 103 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (numeral_cluster); }} {te = p;p--;{ found_syllable (numeral_cluster); }}
break; break;
case 14: case 14:
#line 147 "hb-ot-shape-complex-use-machine.rl" #line 104 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (symbol_cluster); }} {te = p;p--;{ found_syllable (symbol_cluster); }}
break; break;
case 17: case 17:
#line 148 "hb-ot-shape-complex-use-machine.rl" #line 105 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (hieroglyph_cluster); }} {te = p;p--;{ found_syllable (hieroglyph_cluster); }}
break; break;
case 15: case 15:
#line 149 "hb-ot-shape-complex-use-machine.rl" #line 106 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (broken_cluster); }} {te = p;p--;{ found_syllable (broken_cluster); }}
break; break;
case 16: case 16:
#line 150 "hb-ot-shape-complex-use-machine.rl" #line 107 "hb-ot-shape-complex-use-machine.rl"
{te = p;p--;{ found_syllable (non_cluster); }} {te = p;p--;{ found_syllable (non_cluster); }}
break; break;
case 1: case 1:
#line 149 "hb-ot-shape-complex-use-machine.rl" #line 106 "hb-ot-shape-complex-use-machine.rl"
{{p = ((te))-1;}{ found_syllable (broken_cluster); }} {{p = ((te))-1;}{ found_syllable (broken_cluster); }}
break; break;
#line 460 "hb-ot-shape-complex-use-machine.hh" #line 460 "hb-ot-shape-complex-use-machine.hh"
@ -481,7 +481,7 @@ _again:
} }
#line 203 "hb-ot-shape-complex-use-machine.rl" #line 160 "hb-ot-shape-complex-use-machine.rl"
} }

View File

@ -40,50 +40,7 @@
%%{ %%{
# Same order as enum use_category_t. Not sure how to avoid duplication. import "hb-ot-shape-complex-use-category.hh";
O = 0; # OTHER
B = 1; # BASE
N = 4; # BASE_NUM
GB = 5; # BASE_OTHER
SUB = 11; # CONS_SUB
H = 12; # HALANT
HN = 13; # HALANT_NUM
ZWNJ = 14; # Zero width non-joiner
R = 18; # REPHA
S = 19; # SYM
CS = 43; # CONS_WITH_STACKER
HVM = 44; # HALANT_OR_VOWEL_MODIFIER
Sk = 48; # SAKOT
G = 49; # HIEROGLYPH
J = 50; # HIEROGLYPH_JOINER
SB = 51; # HIEROGLYPH_SEGMENT_BEGIN
SE = 52; # HIEROGLYPH_SEGMENT_END
FAbv = 24; # CONS_FINAL_ABOVE
FBlw = 25; # CONS_FINAL_BELOW
FPst = 26; # CONS_FINAL_POST
MAbv = 27; # CONS_MED_ABOVE
MBlw = 28; # CONS_MED_BELOW
MPst = 29; # CONS_MED_POST
MPre = 30; # CONS_MED_PRE
CMAbv = 31; # CONS_MOD_ABOVE
CMBlw = 32; # CONS_MOD_BELOW
VAbv = 33; # VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST
VBlw = 34; # VOWEL_BELOW / VOWEL_BELOW_POST
VPst = 35; # VOWEL_POST UIPC = Right
VPre = 22; # VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST
VMAbv = 37; # VOWEL_MOD_ABOVE
VMBlw = 38; # VOWEL_MOD_BELOW
VMPst = 39; # VOWEL_MOD_POST
VMPre = 23; # VOWEL_MOD_PRE
SMAbv = 41; # SYM_MOD_ABOVE
SMBlw = 42; # SYM_MOD_BELOW
FMAbv = 45; # CONS_FINAL_MOD UIPC = Top
FMBlw = 46; # CONS_FINAL_MOD UIPC = Bottom
FMPst = 47; # CONS_FINAL_MOD UIPC = Not_Applicable
h = H | HVM | Sk; h = H | HVM | Sk;

View File

@ -33,62 +33,11 @@
#include "hb-ot-shape-complex.hh" #include "hb-ot-shape-complex.hh"
#include "hb-ot-shape-complex-use-category.hh"
#define USE_TABLE_ELEMENT_TYPE uint8_t #define USE_TABLE_ELEMENT_TYPE uint8_t
/* Cateories used in the Universal Shaping Engine spec:
* https://docs.microsoft.com/en-us/typography/script-development/use
*/
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum use_category_t {
USE_O = 0, /* OTHER */
USE_B = 1, /* BASE */
USE_N = 4, /* BASE_NUM */
USE_GB = 5, /* BASE_OTHER */
USE_SUB = 11, /* CONS_SUB */
USE_H = 12, /* HALANT */
USE_HN = 13, /* HALANT_NUM */
USE_ZWNJ = 14, /* Zero width non-joiner */
USE_R = 18, /* REPHA */
USE_S = 19, /* SYM */
USE_CS = 43, /* CONS_WITH_STACKER */
/* https://github.com/harfbuzz/harfbuzz/issues/1102 */
USE_HVM = 44, /* HALANT_OR_VOWEL_MODIFIER */
USE_Sk = 48, /* SAKOT */
USE_G = 49, /* HIEROGLYPH */
USE_J = 50, /* HIEROGLYPH_JOINER */
USE_SB = 51, /* HIEROGLYPH_SEGMENT_BEGIN */
USE_SE = 52, /* HIEROGLYPH_SEGMENT_END */
USE_FAbv = 24, /* CONS_FINAL_ABOVE */
USE_FBlw = 25, /* CONS_FINAL_BELOW */
USE_FPst = 26, /* CONS_FINAL_POST */
USE_MAbv = 27, /* CONS_MED_ABOVE */
USE_MBlw = 28, /* CONS_MED_BELOW */
USE_MPst = 29, /* CONS_MED_POST */
USE_MPre = 30, /* CONS_MED_PRE */
USE_CMAbv = 31, /* CONS_MOD_ABOVE */
USE_CMBlw = 32, /* CONS_MOD_BELOW */
USE_VAbv = 33, /* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */
USE_VBlw = 34, /* VOWEL_BELOW / VOWEL_BELOW_POST */
USE_VPst = 35, /* VOWEL_POST UIPC = Right */
USE_VPre = 22, /* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */
USE_VMAbv = 37, /* VOWEL_MOD_ABOVE */
USE_VMBlw = 38, /* VOWEL_MOD_BELOW */
USE_VMPst = 39, /* VOWEL_MOD_POST */
USE_VMPre = 23, /* VOWEL_MOD_PRE */
USE_SMAbv = 41, /* SYM_MOD_ABOVE */
USE_SMBlw = 42, /* SYM_MOD_BELOW */
USE_FMAbv = 45, /* CONS_FINAL_MOD UIPC = Top */
USE_FMBlw = 46, /* CONS_FINAL_MOD UIPC = Bottom */
USE_FMPst = 47, /* CONS_FINAL_MOD UIPC = Not_Applicable */
};
HB_INTERNAL USE_TABLE_ELEMENT_TYPE HB_INTERNAL USE_TABLE_ELEMENT_TYPE
hb_use_get_category (hb_codepoint_t u); hb_use_get_category (hb_codepoint_t u);

View File

@ -119,6 +119,7 @@ hb_base_sources = files(
'hb-ot-shape-complex-default.cc', 'hb-ot-shape-complex-default.cc',
'hb-ot-shape-complex-hangul.cc', 'hb-ot-shape-complex-hangul.cc',
'hb-ot-shape-complex-hebrew.cc', 'hb-ot-shape-complex-hebrew.cc',
'hb-ot-shape-complex-indic-category.hh',
'hb-ot-shape-complex-indic-table.cc', 'hb-ot-shape-complex-indic-table.cc',
'hb-ot-shape-complex-indic.cc', 'hb-ot-shape-complex-indic.cc',
'hb-ot-shape-complex-indic.hh', 'hb-ot-shape-complex-indic.hh',
@ -127,6 +128,7 @@ hb_base_sources = files(
'hb-ot-shape-complex-myanmar.cc', 'hb-ot-shape-complex-myanmar.cc',
'hb-ot-shape-complex-myanmar.hh', 'hb-ot-shape-complex-myanmar.hh',
'hb-ot-shape-complex-thai.cc', 'hb-ot-shape-complex-thai.cc',
'hb-ot-shape-complex-use-category.hh',
'hb-ot-shape-complex-use-table.cc', 'hb-ot-shape-complex-use-table.cc',
'hb-ot-shape-complex-use.cc', 'hb-ot-shape-complex-use.cc',
'hb-ot-shape-complex-use.hh', 'hb-ot-shape-complex-use.hh',