[khmer] Split off Khmer shaper from Indic
Towards fixing https://github.com/harfbuzz/harfbuzz/issues/667 The Khmer spec is different enough from other Indic ones to require its own grammar. No change in functionality. Test numbers are: BENGALI: 353725 out of 354188 tests passed. 463 failed (0.130722%) DEVANAGARI: 707307 out of 707394 tests passed. 87 failed (0.0122987%) GUJARATI: 366355 out of 366457 tests passed. 102 failed (0.0278341%) GURMUKHI: 60729 out of 60747 tests passed. 18 failed (0.0296311%) KANNADA: 951300 out of 951913 tests passed. 613 failed (0.0643966%) KHMER: 299071 out of 299124 tests passed. 53 failed (0.0177184%) MALAYALAM: 1048136 out of 1048334 tests passed. 198 failed (0.0188871%) ORIYA: 42320 out of 42329 tests passed. 9 failed (0.021262%) SINHALA: 271662 out of 271847 tests passed. 185 failed (0.068053%) TAMIL: 1091754 out of 1091754 tests passed. 0 failed (0%) TELUGU: 970555 out of 970573 tests passed. 18 failed (0.00185457%)
This commit is contained in:
parent
1e05ea79d0
commit
dcf4d95fea
|
@ -107,6 +107,7 @@ HB_OT_sources = \
|
|||
hb-ot-shape-complex-indic.cc \
|
||||
hb-ot-shape-complex-indic-private.hh \
|
||||
hb-ot-shape-complex-indic-table.cc \
|
||||
hb-ot-shape-complex-khmer.cc \
|
||||
hb-ot-shape-complex-myanmar.cc \
|
||||
hb-ot-shape-complex-thai.cc \
|
||||
hb-ot-shape-complex-tibetan.cc \
|
||||
|
@ -128,11 +129,13 @@ HB_OT_sources = \
|
|||
|
||||
HB_OT_RAGEL_GENERATED_sources = \
|
||||
hb-ot-shape-complex-indic-machine.hh \
|
||||
hb-ot-shape-complex-khmer-machine.hh \
|
||||
hb-ot-shape-complex-myanmar-machine.hh \
|
||||
hb-ot-shape-complex-use-machine.hh \
|
||||
$(NULL)
|
||||
HB_OT_RAGEL_sources = \
|
||||
hb-ot-shape-complex-indic-machine.rl \
|
||||
hb-ot-shape-complex-khmer-machine.rl \
|
||||
hb-ot-shape-complex-myanmar-machine.rl \
|
||||
hb-ot-shape-complex-use-machine.rl \
|
||||
$(NULL)
|
||||
|
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Copyright © 2011,2012 Google, Inc.
|
||||
*
|
||||
* This is part of HarfBuzz, a text shaping library.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
* license or royalty fees, to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose, provided that the
|
||||
* above copyright notice and the following two paragraphs appear in
|
||||
* all copies of this software.
|
||||
*
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
* DAMAGE.
|
||||
*
|
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
*
|
||||
* Google Author(s): Behdad Esfahbod
|
||||
*/
|
||||
|
||||
#ifndef HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH
|
||||
#define HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH
|
||||
|
||||
#include "hb-private.hh"
|
||||
|
||||
%%{
|
||||
machine khmer_syllable_machine;
|
||||
alphtype unsigned char;
|
||||
write data;
|
||||
}%%
|
||||
|
||||
%%{
|
||||
|
||||
# Same order as enum khmer_category_t. Not sure how to avoid duplication.
|
||||
X = 0;
|
||||
C = 1;
|
||||
V = 2;
|
||||
N = 3;
|
||||
H = 4;
|
||||
ZWNJ = 5;
|
||||
ZWJ = 6;
|
||||
M = 7;
|
||||
SM = 8;
|
||||
VD = 9;
|
||||
A = 10;
|
||||
PLACEHOLDER = 11;
|
||||
DOTTEDCIRCLE = 12;
|
||||
RS = 13;
|
||||
Coeng = 14;
|
||||
Repha = 15;
|
||||
Ra = 16;
|
||||
CM = 17;
|
||||
Symbol= 18;
|
||||
CS = 19;
|
||||
|
||||
c = (C | Ra); # is_consonant
|
||||
n = ((ZWNJ?.RS)? (N.N?)?); # is_consonant_modifier
|
||||
z = ZWJ|ZWNJ; # is_joiner
|
||||
h = H | Coeng; # is_halant_or_coeng
|
||||
reph = (Ra H | Repha); # possible reph
|
||||
|
||||
cn = c.ZWJ?.n?;
|
||||
forced_rakar = ZWJ H ZWJ Ra;
|
||||
symbol = Symbol.N?;
|
||||
matra_group = z{0,3}.M.N?.(H | forced_rakar)?;
|
||||
syllable_tail = (z?.SM.SM?.ZWNJ?)? A{0,3}? VD{0,2};
|
||||
place_holder = PLACEHOLDER | DOTTEDCIRCLE;
|
||||
halant_group = (z?.h.(ZWJ.N?)?);
|
||||
final_halant_group = halant_group | h.ZWNJ;
|
||||
medial_group = CM?;
|
||||
halant_or_matra_group = (final_halant_group | (h.ZWJ)? matra_group{0,4}) (Coeng (cn|V))?;
|
||||
|
||||
|
||||
consonant_syllable = (Repha|CS)? (cn.halant_group){0,4} cn medial_group halant_or_matra_group syllable_tail;
|
||||
vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail);
|
||||
standalone_cluster = ((Repha|CS)? PLACEHOLDER | reph? DOTTEDCIRCLE).n? (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail;
|
||||
symbol_cluster = symbol syllable_tail;
|
||||
broken_cluster = reph? n? (halant_group.cn){0,4} medial_group halant_or_matra_group syllable_tail;
|
||||
other = any;
|
||||
|
||||
main := |*
|
||||
consonant_syllable => { found_syllable (consonant_syllable); };
|
||||
vowel_syllable => { found_syllable (vowel_syllable); };
|
||||
standalone_cluster => { found_syllable (standalone_cluster); };
|
||||
symbol_cluster => { found_syllable (symbol_cluster); };
|
||||
broken_cluster => { found_syllable (broken_cluster); };
|
||||
other => { found_syllable (non_khmer_cluster); };
|
||||
*|;
|
||||
|
||||
|
||||
}%%
|
||||
|
||||
#define found_syllable(syllable_type) \
|
||||
HB_STMT_START { \
|
||||
if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \
|
||||
for (unsigned int i = last; i < p+1; i++) \
|
||||
info[i].syllable() = (syllable_serial << 4) | syllable_type; \
|
||||
last = p+1; \
|
||||
syllable_serial++; \
|
||||
if (unlikely (syllable_serial == 16)) syllable_serial = 1; \
|
||||
} HB_STMT_END
|
||||
|
||||
static void
|
||||
find_syllables (hb_buffer_t *buffer)
|
||||
{
|
||||
unsigned int p, pe, eof, ts HB_UNUSED, te HB_UNUSED, act HB_UNUSED;
|
||||
int cs;
|
||||
hb_glyph_info_t *info = buffer->info;
|
||||
%%{
|
||||
write init;
|
||||
getkey info[p].khmer_category();
|
||||
}%%
|
||||
|
||||
p = 0;
|
||||
pe = eof = buffer->len;
|
||||
|
||||
unsigned int last = 0;
|
||||
unsigned int syllable_serial = 1;
|
||||
%%{
|
||||
write exec;
|
||||
}%%
|
||||
}
|
||||
|
||||
#endif /* HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH */
|
File diff suppressed because it is too large
Load Diff
|
@ -54,9 +54,10 @@ enum hb_ot_shape_zero_width_marks_type_t {
|
|||
HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (hebrew) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (myanmar_old) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (indic) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (khmer) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (myanmar_old) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (thai) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (tibetan) \
|
||||
HB_COMPLEX_SHAPER_IMPLEMENT (use) \
|
||||
|
@ -285,7 +286,7 @@ hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
|
|||
planner->map.language_index[0],
|
||||
HB_TAG ('p','r','e','f'),
|
||||
nullptr))
|
||||
return &_hb_ot_complex_shaper_indic;
|
||||
return &_hb_ot_complex_shaper_khmer;
|
||||
else
|
||||
return &_hb_ot_complex_shaper_default;
|
||||
|
||||
|
|
Loading…
Reference in New Issue