Insert a dotted circle within <U+0B85, U+0BC2>

This commit is contained in:
David Corbett 2019-11-08 20:59:48 -05:00 committed by Behdad Esfahbod
parent 64a45be519
commit b372c3e956
4 changed files with 40 additions and 111 deletions

View File

@ -1,97 +0,0 @@
# Copied from https://docs.microsoft.com/en-us/typography/script-development/use
# On October 23, 2018; with documentd dated 02/07/2018.
0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E
0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA
0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I
0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U
090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E
090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E
090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E
0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O
0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E
0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O
0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E
0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O
0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E
0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU
0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI
0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E
0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE
0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE
0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE
0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW
0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE
0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE
0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA
098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R
098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L
0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA
0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I
0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II
0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U
0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU
0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE
0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI
0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO
0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU
0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA
0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E
0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E
0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI
0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O
0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O
0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E
0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU
0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI
0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA
0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA
0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK
0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK
0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK
0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU
0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK
0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK
0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK
0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA
0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU
0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA
0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK
0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK
0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E
0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA
0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK
0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA
0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA
0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA
0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA
0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA
0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA
0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA
0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA
11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA
1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R
1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E
11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA
11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E
11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O
11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU
112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA
112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E
112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI
112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O
112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU
11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA
114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R
114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR
1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E
1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E
11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E
11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI
11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E
11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI

View File

@ -290,7 +290,7 @@ ucd-table: gen-ucd-table.py ucd.nounihan.grouped.zip hb-common.h
use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false)
vowel-constraints: gen-vowel-constraints.py HBIndicVowelConstraints.txt Scripts.txt
vowel-constraints: gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc; false)

View File

@ -25,7 +25,7 @@ import io
import sys
if len (sys.argv) != 3:
print ('usage: ./gen-vowel-constraints.py HBIndicVowelConstraints.txt Scripts.txt', file=sys.stderr)
print ('usage: ./gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt', file=sys.stderr)
sys.exit (1)
with io.open (sys.argv[2], encoding='utf-8') as f:
@ -84,7 +84,8 @@ class ConstraintSet (object):
else:
self._c[first] = ConstraintSet (rest)
def _indent (self, depth):
@staticmethod
def _indent (depth):
return (' ' * depth).replace (' ', '\t')
def __str__ (self, index=0, depth=4):
@ -92,17 +93,20 @@ class ConstraintSet (object):
indent = self._indent (depth)
if isinstance (self._c, list):
if len (self._c) == 0:
assert index == 2, 'Cannot use `matched` for this constraint; the general case has not been implemented'
s.append ('{}matched = true;\n'.format (indent))
elif len (self._c) == 1:
assert index == 1, 'Cannot use `matched` for this constraint; the general case has not been implemented'
s.append ('{}matched = 0x{:04X}u == buffer->cur ({}).codepoint;\n'.format (indent, next (iter (self._c)), index or ''))
else:
s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index))
s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), len (self._c)))
s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index or ''))
if index:
s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), index + 1))
for i, cp in enumerate (self._c[1:], start=1):
s.append ('{}0x{:04X}u == buffer->cur ({}).codepoint{}\n'.format (
self._indent (depth + 2), cp, index + i, ')' if i == len (self._c) - 1 else ' &&'))
s.append ('{}{{\n'.format (indent))
for i in range (len (self._c)):
for i in range (index + 1):
s.append ('{}buffer->next_glyph ();\n'.format (self._indent (depth + 1)))
s.append ('{}_output_dotted_circle (buffer);\n'.format (self._indent (depth + 1)))
s.append ('{}}}\n'.format (indent))
@ -128,7 +132,12 @@ class ConstraintSet (object):
constraints = {}
with io.open (sys.argv[1], encoding='utf-8') as f:
constraints_header = [f.readline ().strip () for i in range (2)]
constraints_header = []
while True:
line = f.readline ().strip ()
if line == '#':
break
constraints_header.append(line)
for line in f:
j = line.find ('#')
if j >= 0:
@ -147,7 +156,7 @@ print ('/* == Start of generated functions == */')
print ('/*')
print (' * The following functions are generated by running:')
print (' *')
print (' * %s use Scripts.txt' % sys.argv[0])
print (' * %s IndicShapingInvalidCluster.txt Scripts.txt' % sys.argv[0])
print (' *')
print (' * on files with these headers:')
print (' *')
@ -185,7 +194,7 @@ print ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB
print ('\t\t\t\t hb_buffer_t *buffer,')
print ('\t\t\t\t hb_font_t *font HB_UNUSED)')
print ('{')
print ('#if defined(HB_NO_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS)')
print ('#ifdef HB_NO_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS')
print (' return;')
print ('#endif')
print (' if (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)')

View File

@ -2,15 +2,16 @@
/*
* The following functions are generated by running:
*
* ./gen-vowel-constraints.py use Scripts.txt
* ./gen-vowel-constraints.py IndicShapingInvalidCluster.txt Scripts.txt
*
* on files with these headers:
*
* # Copied from https://docs.microsoft.com/en-us/typography/script-development/use
* # On October 23, 2018; with documentd dated 02/07/2018.
* # IndicShapingInvalidCluster.txt
* # Date: 2015-03-12, 21:17:00 GMT [AG]
* # Date: 2019-11-08, 23:22:00 GMT [AG]
*
* # Scripts-12.0.0.txt
* # Date: 2019-01-28, 22:16:47 GMT
* # Scripts-12.1.0.txt
* # Date: 2019-04-01, 09:10:42 GMT
*/
#include "hb.hh"
@ -211,6 +212,22 @@ _hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED,
processed = true;
break;
case HB_SCRIPT_TAMIL:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
if (0x0B85u == buffer->cur ().codepoint &&
0x0BC2u == buffer->cur (1).codepoint)
{
buffer->next_glyph ();
_output_dotted_circle (buffer);
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_TELUGU:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{