[khmer] Remove duplication of categories in the Khmer shaper

This commit is contained in:
Behdad Esfahbod 2022-06-05 01:34:10 -06:00
parent 17c80035ad
commit ce0528c0ff
6 changed files with 105 additions and 142 deletions

View File

@ -144,7 +144,6 @@ HB_BASE_sources = \
hb-ot-shaper-indic.cc \ hb-ot-shaper-indic.cc \
hb-ot-shaper-indic.hh \ hb-ot-shaper-indic.hh \
hb-ot-shaper-khmer.cc \ hb-ot-shaper-khmer.cc \
hb-ot-shaper-khmer.hh \
hb-ot-shaper-myanmar.cc \ hb-ot-shaper-myanmar.cc \
hb-ot-shaper-myanmar.hh \ hb-ot-shaper-myanmar.hh \
hb-ot-shaper-syllabic.cc \ hb-ot-shaper-syllabic.cc \

View File

@ -31,6 +31,16 @@
#include "hb.hh" #include "hb.hh"
#include "hb-ot-layout.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */
using khmer_category_t = unsigned;
#define K_Cat(Cat) khmer_syllable_machine_ex_##Cat
enum khmer_syllable_type_t { enum khmer_syllable_type_t {
khmer_consonant_syllable, khmer_consonant_syllable,
khmer_broken_cluster, khmer_broken_cluster,
@ -38,7 +48,7 @@ enum khmer_syllable_type_t {
}; };
#line 42 "hb-ot-shaper-khmer-machine.hh" #line 52 "hb-ot-shaper-khmer-machine.hh"
#define khmer_syllable_machine_ex_C 1u #define khmer_syllable_machine_ex_C 1u
#define khmer_syllable_machine_ex_Coeng 13u #define khmer_syllable_machine_ex_Coeng 13u
#define khmer_syllable_machine_ex_DOTTEDCIRCLE 11u #define khmer_syllable_machine_ex_DOTTEDCIRCLE 11u
@ -56,7 +66,7 @@ enum khmer_syllable_type_t {
#define khmer_syllable_machine_ex_ZWNJ 5u #define khmer_syllable_machine_ex_ZWNJ 5u
#line 60 "hb-ot-shaper-khmer-machine.hh" #line 70 "hb-ot-shaper-khmer-machine.hh"
static const unsigned char _khmer_syllable_machine_trans_keys[] = { static const unsigned char _khmer_syllable_machine_trans_keys[] = {
5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 15u, 5u, 21u, 5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 15u, 5u, 21u, 5u, 26u, 5u, 21u,
5u, 26u, 5u, 21u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 15u, 5u, 21u, 5u, 26u, 5u, 26u, 5u, 21u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 15u, 5u, 21u, 5u, 26u,
@ -239,11 +249,11 @@ static const int khmer_syllable_machine_error = -1;
static const int khmer_syllable_machine_en_main = 20; static const int khmer_syllable_machine_en_main = 20;
#line 43 "hb-ot-shaper-khmer-machine.rl" #line 53 "hb-ot-shaper-khmer-machine.rl"
#line 86 "hb-ot-shaper-khmer-machine.rl" #line 96 "hb-ot-shaper-khmer-machine.rl"
#define found_syllable(syllable_type) \ #define found_syllable(syllable_type) \
@ -262,7 +272,7 @@ find_syllables_khmer (hb_buffer_t *buffer)
int cs; int cs;
hb_glyph_info_t *info = buffer->info; hb_glyph_info_t *info = buffer->info;
#line 266 "hb-ot-shaper-khmer-machine.hh" #line 276 "hb-ot-shaper-khmer-machine.hh"
{ {
cs = khmer_syllable_machine_start; cs = khmer_syllable_machine_start;
ts = 0; ts = 0;
@ -270,7 +280,7 @@ find_syllables_khmer (hb_buffer_t *buffer)
act = 0; act = 0;
} }
#line 106 "hb-ot-shaper-khmer-machine.rl" #line 116 "hb-ot-shaper-khmer-machine.rl"
p = 0; p = 0;
@ -278,7 +288,7 @@ find_syllables_khmer (hb_buffer_t *buffer)
unsigned int syllable_serial = 1; unsigned int syllable_serial = 1;
#line 282 "hb-ot-shaper-khmer-machine.hh" #line 292 "hb-ot-shaper-khmer-machine.hh"
{ {
int _slen; int _slen;
int _trans; int _trans;
@ -292,7 +302,7 @@ _resume:
#line 1 "NONE" #line 1 "NONE"
{ts = p;} {ts = p;}
break; break;
#line 296 "hb-ot-shaper-khmer-machine.hh" #line 306 "hb-ot-shaper-khmer-machine.hh"
} }
_keys = _khmer_syllable_machine_trans_keys + (cs<<1); _keys = _khmer_syllable_machine_trans_keys + (cs<<1);
@ -315,27 +325,27 @@ _eof_trans:
{te = p+1;} {te = p+1;}
break; break;
case 8: case 8:
#line 82 "hb-ot-shaper-khmer-machine.rl" #line 92 "hb-ot-shaper-khmer-machine.rl"
{te = p+1;{ found_syllable (khmer_non_khmer_cluster); }} {te = p+1;{ found_syllable (khmer_non_khmer_cluster); }}
break; break;
case 10: case 10:
#line 80 "hb-ot-shaper-khmer-machine.rl" #line 90 "hb-ot-shaper-khmer-machine.rl"
{te = p;p--;{ found_syllable (khmer_consonant_syllable); }} {te = p;p--;{ found_syllable (khmer_consonant_syllable); }}
break; break;
case 12: case 12:
#line 81 "hb-ot-shaper-khmer-machine.rl" #line 91 "hb-ot-shaper-khmer-machine.rl"
{te = p;p--;{ found_syllable (khmer_broken_cluster); }} {te = p;p--;{ found_syllable (khmer_broken_cluster); }}
break; break;
case 11: case 11:
#line 82 "hb-ot-shaper-khmer-machine.rl" #line 92 "hb-ot-shaper-khmer-machine.rl"
{te = p;p--;{ found_syllable (khmer_non_khmer_cluster); }} {te = p;p--;{ found_syllable (khmer_non_khmer_cluster); }}
break; break;
case 1: case 1:
#line 80 "hb-ot-shaper-khmer-machine.rl" #line 90 "hb-ot-shaper-khmer-machine.rl"
{{p = ((te))-1;}{ found_syllable (khmer_consonant_syllable); }} {{p = ((te))-1;}{ found_syllable (khmer_consonant_syllable); }}
break; break;
case 5: case 5:
#line 81 "hb-ot-shaper-khmer-machine.rl" #line 91 "hb-ot-shaper-khmer-machine.rl"
{{p = ((te))-1;}{ found_syllable (khmer_broken_cluster); }} {{p = ((te))-1;}{ found_syllable (khmer_broken_cluster); }}
break; break;
case 3: case 3:
@ -353,16 +363,16 @@ _eof_trans:
case 4: case 4:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 81 "hb-ot-shaper-khmer-machine.rl" #line 91 "hb-ot-shaper-khmer-machine.rl"
{act = 2;} {act = 2;}
break; break;
case 9: case 9:
#line 1 "NONE" #line 1 "NONE"
{te = p+1;} {te = p+1;}
#line 82 "hb-ot-shaper-khmer-machine.rl" #line 92 "hb-ot-shaper-khmer-machine.rl"
{act = 3;} {act = 3;}
break; break;
#line 366 "hb-ot-shaper-khmer-machine.hh" #line 376 "hb-ot-shaper-khmer-machine.hh"
} }
_again: _again:
@ -371,7 +381,7 @@ _again:
#line 1 "NONE" #line 1 "NONE"
{ts = 0;} {ts = 0;}
break; break;
#line 375 "hb-ot-shaper-khmer-machine.hh" #line 385 "hb-ot-shaper-khmer-machine.hh"
} }
if ( ++p != pe ) if ( ++p != pe )
@ -387,7 +397,7 @@ _again:
} }
#line 114 "hb-ot-shaper-khmer-machine.rl" #line 124 "hb-ot-shaper-khmer-machine.rl"
} }

View File

@ -29,6 +29,16 @@
#include "hb.hh" #include "hb.hh"
#include "hb-ot-layout.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */
using khmer_category_t = unsigned;
#define K_Cat(Cat) khmer_syllable_machine_ex_##Cat
enum khmer_syllable_type_t { enum khmer_syllable_type_t {
khmer_consonant_syllable, khmer_consonant_syllable,
khmer_broken_cluster, khmer_broken_cluster,

View File

@ -28,8 +28,8 @@
#ifndef HB_NO_OT_SHAPE #ifndef HB_NO_OT_SHAPE
#include "hb-ot-shaper-khmer.hh"
#include "hb-ot-shaper-khmer-machine.hh" #include "hb-ot-shaper-khmer-machine.hh"
#include "hb-ot-shaper-indic.hh"
#include "hb-ot-layout.hh" #include "hb-ot-layout.hh"
@ -79,6 +79,66 @@ enum {
KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */ KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */
}; };
static inline void
set_khmer_properties (hb_glyph_info_t &info)
{
hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u);
khmer_category_t cat = (khmer_category_t) (type & 0xFFu);
indic_position_t pos = (indic_position_t) (type >> 8);
/*
* Re-assign category
*
* These categories are experimentally extracted from what Uniscribe allows.
*/
switch (u)
{
case 0x179Au:
cat = (khmer_category_t) K_Cat(Ra);
break;
case 0x17CCu:
case 0x17C9u:
case 0x17CAu:
cat = K_Cat(Robatic);
break;
case 0x17C6u:
case 0x17CBu:
case 0x17CDu:
case 0x17CEu:
case 0x17CFu:
case 0x17D0u:
case 0x17D1u:
cat = K_Cat(Xgroup);
break;
case 0x17C7u:
case 0x17C8u:
case 0x17DDu:
case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
cat = K_Cat(Ygroup);
break;
}
/*
* Re-assign position.
*/
if (cat == (khmer_category_t) OT_M /* Indic M */)
switch ((int) pos)
{
case POS_PRE_C: cat = (khmer_category_t) K_Cat(VPre); break;
case POS_BELOW_C: cat = (khmer_category_t) K_Cat(VBlw); break;
case POS_ABOVE_C: cat = (khmer_category_t) K_Cat(VAbv); break;
case POS_POST_C: cat = (khmer_category_t) K_Cat(VPst); break;
default: assert (0);
}
info.khmer_category() = cat;
}
static void static void
setup_syllables_khmer (const hb_ot_shape_plan_t *plan, setup_syllables_khmer (const hb_ot_shape_plan_t *plan,
hb_font_t *font, hb_font_t *font,
@ -231,11 +291,11 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
* the 'pref' OpenType feature applied to them. * the 'pref' OpenType feature applied to them.
* """ * """
*/ */
if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end) if (info[i].khmer_category() == K_Cat(Coeng) && num_coengs <= 2 && i + 1 < end)
{ {
num_coengs++; num_coengs++;
if (info[i + 1].khmer_category() == OT_Ra) if (info[i + 1].khmer_category() == K_Cat(Ra))
{ {
for (unsigned int j = 0; j < 2; j++) for (unsigned int j = 0; j < 2; j++)
info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF]; info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF];
@ -263,7 +323,7 @@ reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
} }
/* Reorder left matra piece. */ /* Reorder left matra piece. */
else if (info[i].khmer_category() == OT_VPre) else if (info[i].khmer_category() == K_Cat(VPre))
{ {
/* Move to the start. */ /* Move to the start. */
buffer->merge_clusters (start, i + 1); buffer->merge_clusters (start, i + 1);
@ -302,8 +362,8 @@ reorder_khmer (const hb_ot_shape_plan_t *plan,
{ {
hb_syllabic_insert_dotted_circles (font, buffer, hb_syllabic_insert_dotted_circles (font, buffer,
khmer_broken_cluster, khmer_broken_cluster,
OT_DOTTEDCIRCLE, K_Cat(DOTTEDCIRCLE),
OT_Repha); (unsigned) -1);
foreach_syllable (buffer, start, end) foreach_syllable (buffer, start, end)
reorder_syllable_khmer (plan, font->face, buffer, start, end); reorder_syllable_khmer (plan, font->face, buffer, start, end);

View File

@ -1,115 +0,0 @@
/*
* Copyright © 2018 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPER_KHMER_HH
#define HB_OT_SHAPER_KHMER_HH
#include "hb.hh"
#include "hb-ot-shaper-indic.hh"
/* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */
/* Note: This enum is duplicated in the -machine.rl source file.
* Not sure how to avoid duplication. */
enum khmer_category_t
{
OT_Robatic = 20,
OT_Xgroup = 21,
OT_Ygroup = 22,
//OT_VAbv = 26,
//OT_VBlw = 27,
//OT_VPre = 28,
//OT_VPst = 29,
};
using khmer_position_t = indic_position_t;
static inline void
set_khmer_properties (hb_glyph_info_t &info)
{
hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u);
khmer_category_t cat = (khmer_category_t) (type & 0xFFu);
khmer_position_t pos = (khmer_position_t) (type >> 8);
/*
* Re-assign category
*
* These categories are experimentally extracted from what Uniscribe allows.
*/
switch (u)
{
case 0x179Au:
cat = (khmer_category_t) OT_Ra;
break;
case 0x17CCu:
case 0x17C9u:
case 0x17CAu:
cat = OT_Robatic;
break;
case 0x17C6u:
case 0x17CBu:
case 0x17CDu:
case 0x17CEu:
case 0x17CFu:
case 0x17D0u:
case 0x17D1u:
cat = OT_Xgroup;
break;
case 0x17C7u:
case 0x17C8u:
case 0x17DDu:
case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
cat = OT_Ygroup;
break;
}
/*
* Re-assign position.
*/
if (cat == (khmer_category_t) OT_M)
switch ((int) pos)
{
case POS_PRE_C: cat = (khmer_category_t) OT_VPre; break;
case POS_BELOW_C: cat = (khmer_category_t) OT_VBlw; break;
case POS_ABOVE_C: cat = (khmer_category_t) OT_VAbv; break;
case POS_POST_C: cat = (khmer_category_t) OT_VPst; break;
default: assert (0);
}
info.khmer_category() = cat;
}
#endif /* HB_OT_SHAPER_KHMER_HH */

View File

@ -148,7 +148,6 @@ hb_base_sources = files(
'hb-ot-shaper-indic.cc', 'hb-ot-shaper-indic.cc',
'hb-ot-shaper-indic.hh', 'hb-ot-shaper-indic.hh',
'hb-ot-shaper-khmer.cc', 'hb-ot-shaper-khmer.cc',
'hb-ot-shaper-khmer.hh',
'hb-ot-shaper-myanmar.cc', 'hb-ot-shaper-myanmar.cc',
'hb-ot-shaper-myanmar.hh', 'hb-ot-shaper-myanmar.hh',
'hb-ot-shaper-syllabic.cc', 'hb-ot-shaper-syllabic.cc',