From 15ba4fbe01433c8627f9e6a60106ca77d3e1ad4c Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 13 Feb 2018 21:41:51 -0800 Subject: [PATCH] [khmer] Add dump-khmer-data --- src/Makefile.am | 4 + src/Makefile.sources | 1 + src/dump-khmer-data.cc | 43 ++++++++ src/hb-ot-shape-complex-khmer-private.hh | 124 +++++++++++++++++++++++ src/hb-ot-shape-complex-khmer.cc | 98 +----------------- 5 files changed, 173 insertions(+), 97 deletions(-) create mode 100644 src/dump-khmer-data.cc create mode 100644 src/hb-ot-shape-complex-khmer-private.hh diff --git a/src/Makefile.am b/src/Makefile.am index b3be138d5..f90b2ac2f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -353,11 +353,15 @@ endif check_PROGRAMS += \ dump-indic-data \ + dump-khmer-data \ dump-myanmar-data \ $(NULL) dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc dump_indic_data_CPPFLAGS = $(HBCFLAGS) dump_indic_data_LDADD = libharfbuzz.la $(HBLIBS) +dump_khmer_data_SOURCES = dump-khmer-data.cc hb-ot-shape-complex-indic-table.cc +dump_khmer_data_CPPFLAGS = $(HBCFLAGS) +dump_khmer_data_LDADD = libharfbuzz.la $(HBLIBS) dump_myanmar_data_SOURCES = dump-myanmar-data.cc hb-ot-shape-complex-indic-table.cc dump_myanmar_data_CPPFLAGS = $(HBCFLAGS) dump_myanmar_data_LDADD = libharfbuzz.la $(HBLIBS) diff --git a/src/Makefile.sources b/src/Makefile.sources index 376d543a5..ec60ec0a6 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -109,6 +109,7 @@ HB_OT_sources = \ hb-ot-shape-complex-indic.cc \ hb-ot-shape-complex-indic-private.hh \ hb-ot-shape-complex-indic-table.cc \ + hb-ot-shape-complex-khmer-private.hh \ hb-ot-shape-complex-khmer.cc \ hb-ot-shape-complex-myanmar-private.hh \ hb-ot-shape-complex-myanmar.cc \ diff --git a/src/dump-khmer-data.cc b/src/dump-khmer-data.cc new file mode 100644 index 000000000..7dd09b2b5 --- /dev/null +++ b/src/dump-khmer-data.cc @@ -0,0 +1,43 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-khmer-private.hh" + +int +main (void) +{ + for (hb_codepoint_t u = 0; u <= 0x10FFFF; u++) + { + hb_glyph_info_t info; + info.codepoint = u; + set_khmer_properties (info); + if (info.khmer_category() != INDIC_SYLLABIC_CATEGORY_OTHER || + info.khmer_position() != INDIC_MATRA_CATEGORY_NOT_APPLICABLE) + printf("U+%04X %u %u\n", u, + info.khmer_category(), + info.khmer_position()); + } +} diff --git a/src/hb-ot-shape-complex-khmer-private.hh b/src/hb-ot-shape-complex-khmer-private.hh new file mode 100644 index 000000000..f90ef9674 --- /dev/null +++ b/src/hb-ot-shape-complex-khmer-private.hh @@ -0,0 +1,124 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH +#define HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH + +#include "hb-private.hh" + +#include "hb-ot-shape-complex-indic-private.hh" + + +/* buffer var allocations */ +#define khmer_category() indic_category() /* khmer_category_t */ +#define khmer_position() indic_position() /* khmer_position_t */ + + +typedef indic_category_t khmer_category_t; +typedef indic_position_t khmer_position_t; + + +static inline khmer_position_t +matra_position_khmer (khmer_position_t side) +{ + switch ((int) side) + { + case POS_PRE_C: + return POS_PRE_M; + + case POS_POST_C: + case POS_ABOVE_C: + case POS_BELOW_C: + return POS_AFTER_POST; + + default: + return side; + }; +} + +static inline bool +is_consonant_or_vowel (const hb_glyph_info_t &info) +{ + return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V)); +} + +static inline bool +is_coeng (const hb_glyph_info_t &info) +{ + return is_one_of (info, FLAG (OT_Coeng)); +} + +static inline void +set_khmer_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + khmer_category_t cat = (khmer_category_t) (type & 0x7Fu); + khmer_position_t pos = (khmer_position_t) (type >> 8); + + + /* + * Re-assign category + */ + + if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */ + else if (unlikely (hb_in_range (u, 0x17CDu, 0x17D1u) || + u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */ + { + /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier. + * https://github.com/roozbehp/unicode-data/issues/5 */ + cat = OT_M; + pos = POS_ABOVE_C; + } + else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER; + else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; + + + /* + * Re-assign position. + */ + + if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) + { + pos = POS_BASE_C; + if (u == 0x179Au) + cat = OT_Ra; + } + else if (cat == OT_M) + { + pos = matra_position_khmer (pos); + } + else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol)))) + { + pos = POS_SMVD; + } + + info.khmer_category() = cat; + info.khmer_position() = pos; +} + + +#endif /* HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH */ diff --git a/src/hb-ot-shape-complex-khmer.cc b/src/hb-ot-shape-complex-khmer.cc index 0e2ca88cb..304879d8f 100644 --- a/src/hb-ot-shape-complex-khmer.cc +++ b/src/hb-ot-shape-complex-khmer.cc @@ -24,105 +24,9 @@ * Google Author(s): Behdad Esfahbod */ -#include "hb-ot-shape-complex-indic-private.hh" +#include "hb-ot-shape-complex-khmer-private.hh" #include "hb-ot-layout-private.hh" -/* buffer var allocations */ -#define khmer_category() indic_category() /* khmer_category_t */ -#define khmer_position() indic_position() /* khmer_position_t */ - - -/* - * Khmer shaper. - */ - -typedef indic_category_t khmer_category_t; -typedef indic_position_t khmer_position_t; - - -static inline khmer_position_t -matra_position_khmer (khmer_position_t side) -{ - switch ((int) side) - { - case POS_PRE_C: - return POS_PRE_M; - - case POS_POST_C: - case POS_ABOVE_C: - case POS_BELOW_C: - return POS_AFTER_POST; - - default: - return side; - }; -} - -static inline bool -is_consonant_or_vowel (const hb_glyph_info_t &info) -{ - return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V)); -} - -static inline bool -is_coeng (const hb_glyph_info_t &info) -{ - return is_one_of (info, FLAG (OT_Coeng)); -} - -static inline void -set_khmer_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - khmer_category_t cat = (khmer_category_t) (type & 0x7Fu); - khmer_position_t pos = (khmer_position_t) (type >> 8); - - - /* - * Re-assign category - */ - - if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */ - else if (unlikely (hb_in_range (u, 0x17CDu, 0x17D1u) || - u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */ - { - /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier. - * https://github.com/roozbehp/unicode-data/issues/5 */ - cat = OT_M; - pos = POS_ABOVE_C; - } - else if (unlikely (hb_in_range (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER; - else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; - - - /* - * Re-assign position. - */ - - if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) - { - pos = POS_BASE_C; - if (u == 0x179Au) - cat = OT_Ra; - } - else if (cat == OT_M) - { - pos = matra_position_khmer (pos); - } - else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol)))) - { - pos = POS_SMVD; - } - - info.khmer_category() = cat; - info.khmer_position() = pos; -} - -/* - * Things above this line should ideally be moved to the Indic table itself. - */ - /* * Khmer shaper.