[khmer] Remove duplication of categories in the Khmer shaper

This commit is contained in:
Behdad Esfahbod 2022-06-05 01:34:10 -06:00
parent 17c80035ad
commit ce0528c0ff
6 changed files with 105 additions and 142 deletions

View File

@ -144,7 +144,6 @@ HB_BASE_sources = \
hb-ot-shaper-indic.cc \
hb-ot-shaper-indic.hh \
hb-ot-shaper-khmer.cc \
hb-ot-shaper-khmer.hh \
hb-ot-shaper-myanmar.cc \
hb-ot-shaper-myanmar.hh \
hb-ot-shaper-syllabic.cc \

View File

@ -31,6 +31,16 @@
#include "hb.hh"
#include "hb-ot-layout.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */
using khmer_category_t = unsigned;
#define K_Cat(Cat) khmer_syllable_machine_ex_##Cat
enum khmer_syllable_type_t {
khmer_consonant_syllable,
khmer_broken_cluster,
@ -38,7 +48,7 @@ enum khmer_syllable_type_t {
};
#line 42 "hb-ot-shaper-khmer-machine.hh"
#line 52 "hb-ot-shaper-khmer-machine.hh"
#define khmer_syllable_machine_ex_C 1u
#define khmer_syllable_machine_ex_Coeng 13u
#define khmer_syllable_machine_ex_DOTTEDCIRCLE 11u
@ -56,7 +66,7 @@ enum khmer_syllable_type_t {
#define khmer_syllable_machine_ex_ZWNJ 5u
#line 60 "hb-ot-shaper-khmer-machine.hh"
#line 70 "hb-ot-shaper-khmer-machine.hh"
static const unsigned char _khmer_syllable_machine_trans_keys[] = {
5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 15u, 5u, 21u, 5u, 26u, 5u, 21u,
5u, 26u, 5u, 21u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 15u, 5u, 21u, 5u, 26u,
@ -239,11 +249,11 @@ static const int khmer_syllable_machine_error = -1;
static const int khmer_syllable_machine_en_main = 20;
#line 43 "hb-ot-shaper-khmer-machine.rl"
#line 53 "hb-ot-shaper-khmer-machine.rl"
#line 86 "hb-ot-shaper-khmer-machine.rl"
#line 96 "hb-ot-shaper-khmer-machine.rl"
#define found_syllable(syllable_type) \
@ -262,7 +272,7 @@ find_syllables_khmer (hb_buffer_t *buffer)
int cs;
hb_glyph_info_t *info = buffer->info;
#line 266 "hb-ot-shaper-khmer-machine.hh"
#line 276 "hb-ot-shaper-khmer-machine.hh"
{
cs = khmer_syllable_machine_start;
ts = 0;
@ -270,7 +280,7 @@ find_syllables_khmer (hb_buffer_t *buffer)
act = 0;
}
#line 106 "hb-ot-shaper-khmer-machine.rl"
#line 116 "hb-ot-shaper-khmer-machine.rl"
p = 0;
@ -278,7 +288,7 @@ find_syllables_khmer (hb_buffer_t *buffer)
unsigned int syllable_serial = 1;
#line 282 "hb-ot-shaper-khmer-machine.hh"
#line 292 "hb-ot-shaper-khmer-machine.hh"
{
int _slen;
int _trans;
@ -292,7 +302,7 @@ _resume:
#line 1 "NONE"
{ts = p;}
break;
#line 296 "hb-ot-shaper-khmer-machine.hh"
#line 306 "hb-ot-shaper-khmer-machine.hh"
}
_keys = _khmer_syllable_machine_trans_keys + (cs<<1);
@ -315,27 +325,27 @@ _eof_trans:
{te = p+1;}
break;
case 8:
#line 82 "hb-ot-shaper-khmer-machine.rl"
#line 92 "hb-ot-shaper-khmer-machine.rl"
{te = p+1;{ found_syllable (khmer_non_khmer_cluster); }}
break;
case 10:
#line 80 "hb-ot-shaper-khmer-machine.rl"
#line 90 "hb-ot-shaper-khmer-machine.rl"
{te = p;p--;{ found_syllable (khmer_consonant_syllable); }}
break;
case 12:
#line 81 "hb-ot-shaper-khmer-machine.rl"
#line 91 "hb-ot-shaper-khmer-machine.rl"
{te = p;p--;{ found_syllable (khmer_broken_cluster); }}
break;
case 11:
#line 82 "hb-ot-shaper-khmer-machine.rl"
#line 92 "hb-ot-shaper-khmer-machine.rl"
{te = p;p--;{ found_syllable (khmer_non_khmer_cluster); }}
break;
case 1:
#line 80 "hb-ot-shaper-khmer-machine.rl"
#line 90 "hb-ot-shaper-khmer-machine.rl"
{{p = ((te))-1;}{ found_syllable (khmer_consonant_syllable); }}
break;
case 5:
#line 81 "hb-ot-shaper-khmer-machine.rl"
#line 91 "hb-ot-shaper-khmer-machine.rl"
{{p = ((te))-1;}{ found_syllable (khmer_broken_cluster); }}
break;
case 3:
@ -353,16 +363,16 @@ _eof_trans:
case 4:
#line 1 "NONE"
{te = p+1;}
#line 81 "hb-ot-shaper-khmer-machine.rl"
#line 91 "hb-ot-shaper-khmer-machine.rl"
{act = 2;}
break;
case 9:
#line 1 "NONE"
{te = p+1;}
#line 82 "hb-ot-shaper-khmer-machine.rl"
#line 92 "hb-ot-shaper-khmer-machine.rl"
{act = 3;}
break;
#line 366 "hb-ot-shaper-khmer-machine.hh"
#line 376 "hb-ot-shaper-khmer-machine.hh"
}
_again:
@ -371,7 +381,7 @@ _again:
#line 1 "NONE"
{ts = 0;}
break;
#line 375 "hb-ot-shaper-khmer-machine.hh"
#line 385 "hb-ot-shaper-khmer-machine.hh"
}
if ( ++p != pe )
@ -387,7 +397,7 @@ _again:
}
#line 114 "hb-ot-shaper-khmer-machine.rl"
#line 124 "hb-ot-shaper-khmer-machine.rl"
}

View File

@ -29,6 +29,16 @@
#include "hb.hh"
#include "hb-ot-layout.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */
using khmer_category_t = unsigned;
#define K_Cat(Cat) khmer_syllable_machine_ex_##Cat
enum khmer_syllable_type_t {
khmer_consonant_syllable,
khmer_broken_cluster,

View File

@ -28,8 +28,8 @@
#ifndef HB_NO_OT_SHAPE
#include "hb-ot-shaper-khmer.hh"
#include "hb-ot-shaper-khmer-machine.hh"
#include "hb-ot-shaper-indic.hh"
#include "hb-ot-layout.hh"
@ -79,6 +79,66 @@ enum {
KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */
};
static inline void
set_khmer_properties (hb_glyph_info_t &info)
{
hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u);
khmer_category_t cat = (khmer_category_t) (type & 0xFFu);
indic_position_t pos = (indic_position_t) (type >> 8);
/*
* Re-assign category
*
* These categories are experimentally extracted from what Uniscribe allows.
*/
switch (u)
{
case 0x179Au:
cat = (khmer_category_t) K_Cat(Ra);
break;
case 0x17CCu:
case 0x17C9u:
case 0x17CAu:
cat = K_Cat(Robatic);
break;
case 0x17C6u:
case 0x17CBu:
case 0x17CDu:
case 0x17CEu:
case 0x17CFu:
case 0x17D0u:
case 0x17D1u:
cat = K_Cat(Xgroup);
break;
case 0x17C7u:
case 0x17C8u:
case 0x17DDu:
case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
cat = K_Cat(Ygroup);
break;
}
/*
* Re-assign position.
*/
if (cat == (khmer_category_t) OT_M /* Indic M */)
switch ((int) pos)
{
case POS_PRE_C: cat = (khmer_category_t) K_Cat(VPre); break;
case POS_BELOW_C: cat = (khmer_category_t) K_Cat(VBlw); break;
case POS_ABOVE_C: cat = (khmer_category_t) K_Cat(VAbv); break;
case POS_POST_C: cat = (khmer_category_t) K_Cat(VPst); break;
default: assert (0);
}
info.khmer_category() = cat;
}
static void
setup_syllables_khmer (const hb_ot_shape_plan_t *plan,
hb_font_t *font,
@ -231,11 +291,11 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
* the 'pref' OpenType feature applied to them.
* """
*/
if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end)
if (info[i].khmer_category() == K_Cat(Coeng) && num_coengs <= 2 && i + 1 < end)
{
num_coengs++;
if (info[i + 1].khmer_category() == OT_Ra)
if (info[i + 1].khmer_category() == K_Cat(Ra))
{
for (unsigned int j = 0; j < 2; j++)
info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF];
@ -263,7 +323,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
}
/* Reorder left matra piece. */
else if (info[i].khmer_category() == OT_VPre)
else if (info[i].khmer_category() == K_Cat(VPre))
{
/* Move to the start. */
buffer->merge_clusters (start, i + 1);
@ -302,8 +362,8 @@ reorder_khmer (const hb_ot_shape_plan_t *plan,
{
hb_syllabic_insert_dotted_circles (font, buffer,
khmer_broken_cluster,
OT_DOTTEDCIRCLE,
OT_Repha);
K_Cat(DOTTEDCIRCLE),
(unsigned) -1);
foreach_syllable (buffer, start, end)
reorder_syllable_khmer (plan, font->face, buffer, start, end);

View File

@ -1,115 +0,0 @@
/*
* Copyright © 2018 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPER_KHMER_HH
#define HB_OT_SHAPER_KHMER_HH
#include "hb.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum khmer_category_t
{
OT_Robatic = 20,
OT_Xgroup = 21,
OT_Ygroup = 22,
//OT_VAbv = 26,
//OT_VBlw = 27,
//OT_VPre = 28,
//OT_VPst = 29,
};
using khmer_position_t = indic_position_t;
static inline void
set_khmer_properties (hb_glyph_info_t &info)
{
hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u);
khmer_category_t cat = (khmer_category_t) (type & 0xFFu);
khmer_position_t pos = (khmer_position_t) (type >> 8);
/*
* Re-assign category
*
* These categories are experimentally extracted from what Uniscribe allows.
*/
switch (u)
{
case 0x179Au:
cat = (khmer_category_t) OT_Ra;
break;
case 0x17CCu:
case 0x17C9u:
case 0x17CAu:
cat = OT_Robatic;
break;
case 0x17C6u:
case 0x17CBu:
case 0x17CDu:
case 0x17CEu:
case 0x17CFu:
case 0x17D0u:
case 0x17D1u:
cat = OT_Xgroup;
break;
case 0x17C7u:
case 0x17C8u:
case 0x17DDu:
case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
cat = OT_Ygroup;
break;
}
/*
* Re-assign position.
*/
if (cat == (khmer_category_t) OT_M)
switch ((int) pos)
{
case POS_PRE_C: cat = (khmer_category_t) OT_VPre; break;
case POS_BELOW_C: cat = (khmer_category_t) OT_VBlw; break;
case POS_ABOVE_C: cat = (khmer_category_t) OT_VAbv; break;
case POS_POST_C: cat = (khmer_category_t) OT_VPst; break;
default: assert (0);
}
info.khmer_category() = cat;
}
#endif /* HB_OT_SHAPER_KHMER_HH */

View File

@ -148,7 +148,6 @@ hb_base_sources = files(
'hb-ot-shaper-indic.cc',
'hb-ot-shaper-indic.hh',
'hb-ot-shaper-khmer.cc',
'hb-ot-shaper-khmer.hh',
'hb-ot-shaper-myanmar.cc',
'hb-ot-shaper-myanmar.hh',
'hb-ot-shaper-syllabic.cc',