[khmer] Add dump-khmer-data
This commit is contained in:
parent
effddd03bd
commit
15ba4fbe01
|
@ -353,11 +353,15 @@ endif
|
||||||
|
|
||||||
check_PROGRAMS += \
|
check_PROGRAMS += \
|
||||||
dump-indic-data \
|
dump-indic-data \
|
||||||
|
dump-khmer-data \
|
||||||
dump-myanmar-data \
|
dump-myanmar-data \
|
||||||
$(NULL)
|
$(NULL)
|
||||||
dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc
|
dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc
|
||||||
dump_indic_data_CPPFLAGS = $(HBCFLAGS)
|
dump_indic_data_CPPFLAGS = $(HBCFLAGS)
|
||||||
dump_indic_data_LDADD = libharfbuzz.la $(HBLIBS)
|
dump_indic_data_LDADD = libharfbuzz.la $(HBLIBS)
|
||||||
|
dump_khmer_data_SOURCES = dump-khmer-data.cc hb-ot-shape-complex-indic-table.cc
|
||||||
|
dump_khmer_data_CPPFLAGS = $(HBCFLAGS)
|
||||||
|
dump_khmer_data_LDADD = libharfbuzz.la $(HBLIBS)
|
||||||
dump_myanmar_data_SOURCES = dump-myanmar-data.cc hb-ot-shape-complex-indic-table.cc
|
dump_myanmar_data_SOURCES = dump-myanmar-data.cc hb-ot-shape-complex-indic-table.cc
|
||||||
dump_myanmar_data_CPPFLAGS = $(HBCFLAGS)
|
dump_myanmar_data_CPPFLAGS = $(HBCFLAGS)
|
||||||
dump_myanmar_data_LDADD = libharfbuzz.la $(HBLIBS)
|
dump_myanmar_data_LDADD = libharfbuzz.la $(HBLIBS)
|
||||||
|
|
|
@ -109,6 +109,7 @@ HB_OT_sources = \
|
||||||
hb-ot-shape-complex-indic.cc \
|
hb-ot-shape-complex-indic.cc \
|
||||||
hb-ot-shape-complex-indic-private.hh \
|
hb-ot-shape-complex-indic-private.hh \
|
||||||
hb-ot-shape-complex-indic-table.cc \
|
hb-ot-shape-complex-indic-table.cc \
|
||||||
|
hb-ot-shape-complex-khmer-private.hh \
|
||||||
hb-ot-shape-complex-khmer.cc \
|
hb-ot-shape-complex-khmer.cc \
|
||||||
hb-ot-shape-complex-myanmar-private.hh \
|
hb-ot-shape-complex-myanmar-private.hh \
|
||||||
hb-ot-shape-complex-myanmar.cc \
|
hb-ot-shape-complex-myanmar.cc \
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2018 Google, Inc.
|
||||||
|
*
|
||||||
|
* This is part of HarfBuzz, a text shaping library.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, without written agreement and without
|
||||||
|
* license or royalty fees, to use, copy, modify, and distribute this
|
||||||
|
* software and its documentation for any purpose, provided that the
|
||||||
|
* above copyright notice and the following two paragraphs appear in
|
||||||
|
* all copies of this software.
|
||||||
|
*
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||||
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||||
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*
|
||||||
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||||
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||||
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||||
|
*
|
||||||
|
* Google Author(s): Behdad Esfahbod
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "hb-ot-shape-complex-khmer-private.hh"
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
for (hb_codepoint_t u = 0; u <= 0x10FFFF; u++)
|
||||||
|
{
|
||||||
|
hb_glyph_info_t info;
|
||||||
|
info.codepoint = u;
|
||||||
|
set_khmer_properties (info);
|
||||||
|
if (info.khmer_category() != INDIC_SYLLABIC_CATEGORY_OTHER ||
|
||||||
|
info.khmer_position() != INDIC_MATRA_CATEGORY_NOT_APPLICABLE)
|
||||||
|
printf("U+%04X %u %u\n", u,
|
||||||
|
info.khmer_category(),
|
||||||
|
info.khmer_position());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,124 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2018 Google, Inc.
|
||||||
|
*
|
||||||
|
* This is part of HarfBuzz, a text shaping library.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, without written agreement and without
|
||||||
|
* license or royalty fees, to use, copy, modify, and distribute this
|
||||||
|
* software and its documentation for any purpose, provided that the
|
||||||
|
* above copyright notice and the following two paragraphs appear in
|
||||||
|
* all copies of this software.
|
||||||
|
*
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||||
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||||
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*
|
||||||
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||||
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||||
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||||
|
*
|
||||||
|
* Google Author(s): Behdad Esfahbod
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH
|
||||||
|
#define HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH
|
||||||
|
|
||||||
|
#include "hb-private.hh"
|
||||||
|
|
||||||
|
#include "hb-ot-shape-complex-indic-private.hh"
|
||||||
|
|
||||||
|
|
||||||
|
/* buffer var allocations */
|
||||||
|
#define khmer_category() indic_category() /* khmer_category_t */
|
||||||
|
#define khmer_position() indic_position() /* khmer_position_t */
|
||||||
|
|
||||||
|
|
||||||
|
typedef indic_category_t khmer_category_t;
|
||||||
|
typedef indic_position_t khmer_position_t;
|
||||||
|
|
||||||
|
|
||||||
|
static inline khmer_position_t
|
||||||
|
matra_position_khmer (khmer_position_t side)
|
||||||
|
{
|
||||||
|
switch ((int) side)
|
||||||
|
{
|
||||||
|
case POS_PRE_C:
|
||||||
|
return POS_PRE_M;
|
||||||
|
|
||||||
|
case POS_POST_C:
|
||||||
|
case POS_ABOVE_C:
|
||||||
|
case POS_BELOW_C:
|
||||||
|
return POS_AFTER_POST;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return side;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_consonant_or_vowel (const hb_glyph_info_t &info)
|
||||||
|
{
|
||||||
|
return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_coeng (const hb_glyph_info_t &info)
|
||||||
|
{
|
||||||
|
return is_one_of (info, FLAG (OT_Coeng));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
set_khmer_properties (hb_glyph_info_t &info)
|
||||||
|
{
|
||||||
|
hb_codepoint_t u = info.codepoint;
|
||||||
|
unsigned int type = hb_indic_get_categories (u);
|
||||||
|
khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
|
||||||
|
khmer_position_t pos = (khmer_position_t) (type >> 8);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Re-assign category
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */
|
||||||
|
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) ||
|
||||||
|
u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */
|
||||||
|
{
|
||||||
|
/* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier.
|
||||||
|
* https://github.com/roozbehp/unicode-data/issues/5 */
|
||||||
|
cat = OT_M;
|
||||||
|
pos = POS_ABOVE_C;
|
||||||
|
}
|
||||||
|
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER;
|
||||||
|
else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Re-assign position.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
|
||||||
|
{
|
||||||
|
pos = POS_BASE_C;
|
||||||
|
if (u == 0x179Au)
|
||||||
|
cat = OT_Ra;
|
||||||
|
}
|
||||||
|
else if (cat == OT_M)
|
||||||
|
{
|
||||||
|
pos = matra_position_khmer (pos);
|
||||||
|
}
|
||||||
|
else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol))))
|
||||||
|
{
|
||||||
|
pos = POS_SMVD;
|
||||||
|
}
|
||||||
|
|
||||||
|
info.khmer_category() = cat;
|
||||||
|
info.khmer_position() = pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH */
|
|
@ -24,105 +24,9 @@
|
||||||
* Google Author(s): Behdad Esfahbod
|
* Google Author(s): Behdad Esfahbod
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "hb-ot-shape-complex-indic-private.hh"
|
#include "hb-ot-shape-complex-khmer-private.hh"
|
||||||
#include "hb-ot-layout-private.hh"
|
#include "hb-ot-layout-private.hh"
|
||||||
|
|
||||||
/* buffer var allocations */
|
|
||||||
#define khmer_category() indic_category() /* khmer_category_t */
|
|
||||||
#define khmer_position() indic_position() /* khmer_position_t */
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Khmer shaper.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef indic_category_t khmer_category_t;
|
|
||||||
typedef indic_position_t khmer_position_t;
|
|
||||||
|
|
||||||
|
|
||||||
static inline khmer_position_t
|
|
||||||
matra_position_khmer (khmer_position_t side)
|
|
||||||
{
|
|
||||||
switch ((int) side)
|
|
||||||
{
|
|
||||||
case POS_PRE_C:
|
|
||||||
return POS_PRE_M;
|
|
||||||
|
|
||||||
case POS_POST_C:
|
|
||||||
case POS_ABOVE_C:
|
|
||||||
case POS_BELOW_C:
|
|
||||||
return POS_AFTER_POST;
|
|
||||||
|
|
||||||
default:
|
|
||||||
return side;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
|
||||||
is_consonant_or_vowel (const hb_glyph_info_t &info)
|
|
||||||
{
|
|
||||||
return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
|
||||||
is_coeng (const hb_glyph_info_t &info)
|
|
||||||
{
|
|
||||||
return is_one_of (info, FLAG (OT_Coeng));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
set_khmer_properties (hb_glyph_info_t &info)
|
|
||||||
{
|
|
||||||
hb_codepoint_t u = info.codepoint;
|
|
||||||
unsigned int type = hb_indic_get_categories (u);
|
|
||||||
khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
|
|
||||||
khmer_position_t pos = (khmer_position_t) (type >> 8);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Re-assign category
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */
|
|
||||||
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) ||
|
|
||||||
u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */
|
|
||||||
{
|
|
||||||
/* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier.
|
|
||||||
* https://github.com/roozbehp/unicode-data/issues/5 */
|
|
||||||
cat = OT_M;
|
|
||||||
pos = POS_ABOVE_C;
|
|
||||||
}
|
|
||||||
else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER;
|
|
||||||
else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Re-assign position.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
|
|
||||||
{
|
|
||||||
pos = POS_BASE_C;
|
|
||||||
if (u == 0x179Au)
|
|
||||||
cat = OT_Ra;
|
|
||||||
}
|
|
||||||
else if (cat == OT_M)
|
|
||||||
{
|
|
||||||
pos = matra_position_khmer (pos);
|
|
||||||
}
|
|
||||||
else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol))))
|
|
||||||
{
|
|
||||||
pos = POS_SMVD;
|
|
||||||
}
|
|
||||||
|
|
||||||
info.khmer_category() = cat;
|
|
||||||
info.khmer_position() = pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Things above this line should ideally be moved to the Indic table itself.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Khmer shaper.
|
* Khmer shaper.
|
||||||
|
|
Loading…
Reference in New Issue