diff --git a/src/HBIndicVowelConstraints.txt b/src/HBIndicVowelConstraints.txt deleted file mode 100644 index 146ae1cb8..000000000 --- a/src/HBIndicVowelConstraints.txt +++ /dev/null @@ -1,97 +0,0 @@ -# Copied from https://docs.microsoft.com/en-us/typography/script-development/use -# On October 23, 2018; with documentd dated 02/07/2018. - - 0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E - 0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA - 0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I - 0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U - 090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E - 090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E - 090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E - 0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O - 0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E - 0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O - 0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E - 0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O - 0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E - 0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU - 0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI - 0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E - 0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE - 0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE - 0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE - 0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW - 0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE - 0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE - 0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA - 098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R - 098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L - 0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA - 0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I - 0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II - 0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U - 0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU - 0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE - 0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI - 0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO - 0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU - 0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA - 0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E - 0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E - 0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI - 0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O - 0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O - 0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E - 0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU - 0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI - 0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA - 0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA - 0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK - 0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK - 0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK - 0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU - 0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK - 0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK - 0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK - 0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA - 0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU - 0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA - 0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK - 0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK - 0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E - 0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA - 0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK - 0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA - 0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA - 0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA - 0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA - 0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA - 0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA - 0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA - 0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA - 0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA - 0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA - 0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA - 0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA - 0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA - 11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA - 1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R - 1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E - 11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA - 11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E - 11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O - 11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU - 112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA - 112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E - 112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI - 112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O - 112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU - 11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA - 114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R - 114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR - 1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E - 1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E - 11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E - 11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI - 11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E - 11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI diff --git a/src/Makefile.am b/src/Makefile.am index a76d96858..29563c6cb 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -290,7 +290,7 @@ ucd-table: gen-ucd-table.py ucd.nounihan.grouped.zip hb-common.h use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \ || ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false) -vowel-constraints: gen-vowel-constraints.py HBIndicVowelConstraints.txt Scripts.txt +vowel-constraints: gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc \ || ($(RM) $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc; false) diff --git a/src/gen-vowel-constraints.py b/src/gen-vowel-constraints.py index 8ca90c819..190c0412b 100755 --- a/src/gen-vowel-constraints.py +++ b/src/gen-vowel-constraints.py @@ -25,7 +25,7 @@ import io import sys if len (sys.argv) != 3: - print ('usage: ./gen-vowel-constraints.py HBIndicVowelConstraints.txt Scripts.txt', file=sys.stderr) + print ('usage: ./gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt', file=sys.stderr) sys.exit (1) with io.open (sys.argv[2], encoding='utf-8') as f: @@ -84,7 +84,8 @@ class ConstraintSet (object): else: self._c[first] = ConstraintSet (rest) - def _indent (self, depth): + @staticmethod + def _indent (depth): return (' ' * depth).replace (' ', '\t') def __str__ (self, index=0, depth=4): @@ -92,17 +93,20 @@ class ConstraintSet (object): indent = self._indent (depth) if isinstance (self._c, list): if len (self._c) == 0: + assert index == 2, 'Cannot use `matched` for this constraint; the general case has not been implemented' s.append ('{}matched = true;\n'.format (indent)) elif len (self._c) == 1: + assert index == 1, 'Cannot use `matched` for this constraint; the general case has not been implemented' s.append ('{}matched = 0x{:04X}u == buffer->cur ({}).codepoint;\n'.format (indent, next (iter (self._c)), index or '')) else: - s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index)) - s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), len (self._c))) + s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index or '')) + if index: + s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), index + 1)) for i, cp in enumerate (self._c[1:], start=1): s.append ('{}0x{:04X}u == buffer->cur ({}).codepoint{}\n'.format ( self._indent (depth + 2), cp, index + i, ')' if i == len (self._c) - 1 else ' &&')) s.append ('{}{{\n'.format (indent)) - for i in range (len (self._c)): + for i in range (index + 1): s.append ('{}buffer->next_glyph ();\n'.format (self._indent (depth + 1))) s.append ('{}_output_dotted_circle (buffer);\n'.format (self._indent (depth + 1))) s.append ('{}}}\n'.format (indent)) @@ -128,7 +132,12 @@ class ConstraintSet (object): constraints = {} with io.open (sys.argv[1], encoding='utf-8') as f: - constraints_header = [f.readline ().strip () for i in range (2)] + constraints_header = [] + while True: + line = f.readline ().strip () + if line == '#': + break + constraints_header.append(line) for line in f: j = line.find ('#') if j >= 0: @@ -147,7 +156,7 @@ print ('/* == Start of generated functions == */') print ('/*') print (' * The following functions are generated by running:') print (' *') -print (' * %s use Scripts.txt' % sys.argv[0]) +print (' * %s IndicShapingInvalidCluster.txt Scripts.txt' % sys.argv[0]) print (' *') print (' * on files with these headers:') print (' *') @@ -185,7 +194,7 @@ print ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB print ('\t\t\t\t hb_buffer_t *buffer,') print ('\t\t\t\t hb_font_t *font HB_UNUSED)') print ('{') -print ('#if defined(HB_NO_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS)') +print ('#ifdef HB_NO_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS') print (' return;') print ('#endif') print (' if (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)') diff --git a/src/hb-ot-shape-complex-vowel-constraints.cc b/src/hb-ot-shape-complex-vowel-constraints.cc index 2f8041323..b7e6f4f06 100644 --- a/src/hb-ot-shape-complex-vowel-constraints.cc +++ b/src/hb-ot-shape-complex-vowel-constraints.cc @@ -2,15 +2,16 @@ /* * The following functions are generated by running: * - * ./gen-vowel-constraints.py use Scripts.txt + * ./gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt * * on files with these headers: * - * # Copied from https://docs.microsoft.com/en-us/typography/script-development/use - * # On October 23, 2018; with documentd dated 02/07/2018. + * # IndicShapingInvalidCluster.txt + * # Date: 2015-03-12, 21:17:00 GMT [AG] + * # Date: 2019-11-08, 23:22:00 GMT [AG] * - * # Scripts-12.0.0.txt - * # Date: 2019-01-28, 22:16:47 GMT + * # Scripts-12.1.0.txt + * # Date: 2019-04-01, 09:10:42 GMT */ #include "hb.hh" @@ -211,6 +212,22 @@ _hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED, processed = true; break; + case HB_SCRIPT_TAMIL: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + if (0x0B85u == buffer->cur ().codepoint && + 0x0BC2u == buffer->cur (1).codepoint) + { + buffer->next_glyph (); + _output_dotted_circle (buffer); + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + case HB_SCRIPT_TELUGU: for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) {