From effddd03bd6fb0aac14b46a16b281f3749e44780 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 13 Feb 2018 21:38:15 -0800 Subject: [PATCH] [myanmar] Add dump-myanmar-data --- src/Makefile.am | 4 + src/Makefile.sources | 1 + src/dump-myanmar-data.cc | 43 ++++++ src/hb-ot-shape-complex-indic-private.hh | 8 +- src/hb-ot-shape-complex-myanmar-private.hh | 171 +++++++++++++++++++++ src/hb-ot-shape-complex-myanmar.cc | 138 +---------------- 6 files changed, 225 insertions(+), 140 deletions(-) create mode 100644 src/dump-myanmar-data.cc create mode 100644 src/hb-ot-shape-complex-myanmar-private.hh diff --git a/src/Makefile.am b/src/Makefile.am index 571ec1134..b3be138d5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -353,10 +353,14 @@ endif check_PROGRAMS += \ dump-indic-data \ + dump-myanmar-data \ $(NULL) dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc dump_indic_data_CPPFLAGS = $(HBCFLAGS) dump_indic_data_LDADD = libharfbuzz.la $(HBLIBS) +dump_myanmar_data_SOURCES = dump-myanmar-data.cc hb-ot-shape-complex-indic-table.cc +dump_myanmar_data_CPPFLAGS = $(HBCFLAGS) +dump_myanmar_data_LDADD = libharfbuzz.la $(HBLIBS) check_PROGRAMS += test-ot-tag TESTS += test-ot-tag diff --git a/src/Makefile.sources b/src/Makefile.sources index f412a65d2..376d543a5 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -110,6 +110,7 @@ HB_OT_sources = \ hb-ot-shape-complex-indic-private.hh \ hb-ot-shape-complex-indic-table.cc \ hb-ot-shape-complex-khmer.cc \ + hb-ot-shape-complex-myanmar-private.hh \ hb-ot-shape-complex-myanmar.cc \ hb-ot-shape-complex-thai.cc \ hb-ot-shape-complex-tibetan.cc \ diff --git a/src/dump-myanmar-data.cc b/src/dump-myanmar-data.cc new file mode 100644 index 000000000..2df9cd987 --- /dev/null +++ b/src/dump-myanmar-data.cc @@ -0,0 +1,43 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-myanmar-private.hh" + +int +main (void) +{ + for (hb_codepoint_t u = 0; u <= 0x10FFFF; u++) + { + hb_glyph_info_t info; + info.codepoint = u; + set_myanmar_properties (info); + if (info.myanmar_category() != INDIC_SYLLABIC_CATEGORY_OTHER || + info.myanmar_position() != INDIC_MATRA_CATEGORY_NOT_APPLICABLE) + printf("U+%04X %u %u\n", u, + info.myanmar_category(), + info.myanmar_position()); + } +} diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index 34a9653dd..867b93627 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -34,6 +34,11 @@ #include "hb-ot-shape-private.hh" /* XXX Remove */ +/* buffer var allocations */ +#define indic_category() complex_var_u8_0() /* indic_category_t */ +#define indic_position() complex_var_u8_1() /* indic_position_t */ + + #define INDIC_TABLE_ELEMENT_TYPE uint16_t /* Cateories used in the OpenType spec: @@ -186,9 +191,6 @@ enum indic_matra_category_t { HB_INTERNAL INDIC_TABLE_ELEMENT_TYPE hb_indic_get_categories (hb_codepoint_t u); -/* buffer var allocations */ -#define indic_category() complex_var_u8_0() /* indic_category_t */ -#define indic_position() complex_var_u8_1() /* indic_position_t */ static inline bool is_one_of (const hb_glyph_info_t &info, unsigned int flags) diff --git a/src/hb-ot-shape-complex-myanmar-private.hh b/src/hb-ot-shape-complex-myanmar-private.hh new file mode 100644 index 000000000..04f81bd12 --- /dev/null +++ b/src/hb-ot-shape-complex-myanmar-private.hh @@ -0,0 +1,171 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_MYANMAR_PRIVATE_HH +#define HB_OT_SHAPE_COMPLEX_MYANMAR_PRIVATE_HH + +#include "hb-private.hh" + +#include "hb-ot-shape-complex-indic-private.hh" + + +/* buffer var allocations */ +#define myanmar_category() indic_category() /* myanmar_category_t */ +#define myanmar_position() indic_position() /* myanmar_position_t */ + + +/* Note: This enum is duplicated in the -machine.rl source file. + * Not sure how to avoid duplication. */ +enum myanmar_category_t { + OT_As = 18, /* Asat */ + OT_D0 = 20, /* Digit zero */ + OT_DB = OT_N, /* Dot below */ + OT_GB = OT_PLACEHOLDER, + OT_MH = 21, /* Various consonant medial types */ + OT_MR = 22, /* Various consonant medial types */ + OT_MW = 23, /* Various consonant medial types */ + OT_MY = 24, /* Various consonant medial types */ + OT_PT = 25, /* Pwo and other tones */ + OT_VAbv = 26, + OT_VBlw = 27, + OT_VPre = 28, + OT_VPst = 29, + OT_VS = 30, /* Variation selectors */ + OT_P = 31, /* Punctuation */ + OT_D = 32, /* Digits except zero */ +}; + + +static inline void +set_myanmar_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + indic_category_t cat = (indic_category_t) (type & 0x7Fu); + indic_position_t pos = (indic_position_t) (type >> 8); + + /* Myanmar + * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze + */ + if (unlikely (hb_in_range (u, 0xFE00u, 0xFE0Fu))) + cat = (indic_category_t) OT_VS; + + switch (u) + { + case 0x104Eu: + cat = (indic_category_t) OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */ + break; + + case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: + case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: + case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: + case 0x25FEu: + cat = (indic_category_t) OT_GB; + break; + + case 0x1004u: case 0x101Bu: case 0x105Au: + cat = (indic_category_t) OT_Ra; + break; + + case 0x1032u: case 0x1036u: + cat = (indic_category_t) OT_A; + break; + + case 0x1039u: + cat = (indic_category_t) OT_H; + break; + + case 0x103Au: + cat = (indic_category_t) OT_As; + break; + + case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: + case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: + case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: + case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: + case 0x1097u: case 0x1098u: case 0x1099u: + cat = (indic_category_t) OT_D; + break; + + case 0x1040u: + cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ + break; + + case 0x103Eu: case 0x1060u: + cat = (indic_category_t) OT_MH; + break; + + case 0x103Cu: + cat = (indic_category_t) OT_MR; + break; + + case 0x103Du: case 0x1082u: + cat = (indic_category_t) OT_MW; + break; + + case 0x103Bu: case 0x105Eu: case 0x105Fu: + cat = (indic_category_t) OT_MY; + break; + + case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: + case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: + cat = (indic_category_t) OT_PT; + break; + + case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: + case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: + case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: + cat = (indic_category_t) OT_SM; + break; + + case 0x104Au: case 0x104Bu: + cat = (indic_category_t) OT_P; + break; + + case 0xAA74u: case 0xAA75u: case 0xAA76u: + /* https://github.com/roozbehp/unicode-data/issues/3 */ + cat = (indic_category_t) OT_C; + break; + } + + if (cat == OT_M) + { + switch ((int) pos) + { + case POS_PRE_C: cat = (indic_category_t) OT_VPre; + pos = POS_PRE_M; break; + case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break; + case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break; + case POS_POST_C: cat = (indic_category_t) OT_VPst; break; + } + } + + info.myanmar_category() = (myanmar_category_t) cat; + info.myanmar_position() = pos; +} + + +#endif /* HB_OT_SHAPE_COMPLEX_MYANMAR_PRIVATE_HH */ diff --git a/src/hb-ot-shape-complex-myanmar.cc b/src/hb-ot-shape-complex-myanmar.cc index dacb1abe0..3c57bc1ff 100644 --- a/src/hb-ot-shape-complex-myanmar.cc +++ b/src/hb-ot-shape-complex-myanmar.cc @@ -24,11 +24,7 @@ * Google Author(s): Behdad Esfahbod */ -#include "hb-ot-shape-complex-indic-private.hh" - -/* buffer var allocations */ -#define myanmar_category() indic_category() /* myanmar_category_t */ -#define myanmar_position() indic_position() /* myanmar_position_t */ +#include "hb-ot-shape-complex-myanmar-private.hh" /* @@ -127,138 +123,6 @@ enum syllable_type_t { #include "hb-ot-shape-complex-myanmar-machine.hh" -/* Note: This enum is duplicated in the -machine.rl source file. - * Not sure how to avoid duplication. */ -enum myanmar_category_t { - OT_As = 18, /* Asat */ - OT_D0 = 20, /* Digit zero */ - OT_DB = OT_N, /* Dot below */ - OT_GB = OT_PLACEHOLDER, - OT_MH = 21, /* Various consonant medial types */ - OT_MR = 22, /* Various consonant medial types */ - OT_MW = 23, /* Various consonant medial types */ - OT_MY = 24, /* Various consonant medial types */ - OT_PT = 25, /* Pwo and other tones */ - OT_VAbv = 26, - OT_VBlw = 27, - OT_VPre = 28, - OT_VPst = 29, - OT_VS = 30, /* Variation selectors */ - OT_P = 31, /* Punctuation */ - OT_D = 32, /* Digits except zero */ -}; - - -static inline void -set_myanmar_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - indic_category_t cat = (indic_category_t) (type & 0x7Fu); - indic_position_t pos = (indic_position_t) (type >> 8); - - /* Myanmar - * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze - */ - if (unlikely (hb_in_range (u, 0xFE00u, 0xFE0Fu))) - cat = (indic_category_t) OT_VS; - - switch (u) - { - case 0x104Eu: - cat = (indic_category_t) OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */ - break; - - case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: - case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: - case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: - case 0x25FEu: - cat = (indic_category_t) OT_GB; - break; - - case 0x1004u: case 0x101Bu: case 0x105Au: - cat = (indic_category_t) OT_Ra; - break; - - case 0x1032u: case 0x1036u: - cat = (indic_category_t) OT_A; - break; - - case 0x1039u: - cat = (indic_category_t) OT_H; - break; - - case 0x103Au: - cat = (indic_category_t) OT_As; - break; - - case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: - case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: - case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: - case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: - case 0x1097u: case 0x1098u: case 0x1099u: - cat = (indic_category_t) OT_D; - break; - - case 0x1040u: - cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ - break; - - case 0x103Eu: case 0x1060u: - cat = (indic_category_t) OT_MH; - break; - - case 0x103Cu: - cat = (indic_category_t) OT_MR; - break; - - case 0x103Du: case 0x1082u: - cat = (indic_category_t) OT_MW; - break; - - case 0x103Bu: case 0x105Eu: case 0x105Fu: - cat = (indic_category_t) OT_MY; - break; - - case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: - case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: - cat = (indic_category_t) OT_PT; - break; - - case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: - case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: - case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: - cat = (indic_category_t) OT_SM; - break; - - case 0x104Au: case 0x104Bu: - cat = (indic_category_t) OT_P; - break; - - case 0xAA74u: case 0xAA75u: case 0xAA76u: - /* https://github.com/roozbehp/unicode-data/issues/3 */ - cat = (indic_category_t) OT_C; - break; - } - - if (cat == OT_M) - { - switch ((int) pos) - { - case POS_PRE_C: cat = (indic_category_t) OT_VPre; - pos = POS_PRE_M; break; - case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break; - case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break; - case POS_POST_C: cat = (indic_category_t) OT_VPst; break; - } - } - - info.myanmar_category() = (myanmar_category_t) cat; - info.myanmar_position() = pos; -} - - - static void setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_buffer_t *buffer,