Add ms-use/IndicShapingInvalidCluster.txt

This commit is contained in:
David Corbett 2019-11-11 17:39:55 -05:00 committed by Behdad Esfahbod
parent b372c3e956
commit e6351d9b2c
5 changed files with 130 additions and 4 deletions

View File

@ -290,7 +290,7 @@ ucd-table: gen-ucd-table.py ucd.nounihan.grouped.zip hb-common.h
use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \ $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false) || ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false)
vowel-constraints: gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt vowel-constraints: gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc \ $(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc; false) || ($(RM) $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc; false)

View File

@ -25,7 +25,7 @@ import io
import sys import sys
if len (sys.argv) != 3: if len (sys.argv) != 3:
print ('usage: ./gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt', file=sys.stderr) print ('usage: ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt', file=sys.stderr)
sys.exit (1) sys.exit (1)
with io.open (sys.argv[2], encoding='utf-8') as f: with io.open (sys.argv[2], encoding='utf-8') as f:
@ -156,7 +156,7 @@ print ('/* == Start of generated functions == */')
print ('/*') print ('/*')
print (' * The following functions are generated by running:') print (' * The following functions are generated by running:')
print (' *') print (' *')
print (' * %s IndicShapingInvalidCluster.txt Scripts.txt' % sys.argv[0]) print (' * %s ms-use/IndicShapingInvalidCluster.txt Scripts.txt' % sys.argv[0])
print (' *') print (' *')
print (' * on files with these headers:') print (' * on files with these headers:')
print (' *') print (' *')

View File

@ -2,7 +2,7 @@
/* /*
* The following functions are generated by running: * The following functions are generated by running:
* *
* ./gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt * ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt
* *
* on files with these headers: * on files with these headers:
* *

21
src/ms-use/COPYING Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) Microsoft Corporation.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

View File

@ -0,0 +1,105 @@
# IndicShapingInvalidCluster.txt
# Date: 2015-03-12, 21:17:00 GMT [AG]
# Date: 2019-11-08, 23:22:00 GMT [AG]
#
# This file defines the following property:
#
# Indic_Shaping_Invalid_Cluster
#
# Scope: This file enumerates sequences of characters that should be treated as invalid clusters
0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E
0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA
0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I
0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U
090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E
090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E
090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E
0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O
0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E
0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O
0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E
0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O
0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E
0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU
0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI
0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E
0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE
0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE
0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE
0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW
0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE
0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE
0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA
098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R
098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L
0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA
0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I
0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II
0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U
0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU
0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE
0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI
0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO
0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU
0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA
0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E
0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E
0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI
0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O
0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O
0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E
0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU
0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI
0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA
0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA
0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK
0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK
0B85 0BC2 ; # TAMIL LETTER A, TAMIL VOWEL SIGN UU
0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK
0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU
0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK
0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK
0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK
0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA
0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU
0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA
0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK
0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK
0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E
0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA
0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK
0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA
0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA
0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA
0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA
0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA
0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA
0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA
0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA
11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA
1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R
1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E
11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA
11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E
11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O
11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU
112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA
112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E
112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI
112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O
112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU
11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA
114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R
114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR
1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E
1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E
11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E
11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI
11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E
11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI