Touch up on previous commit

https://github.com/harfbuzz/harfbuzz/pull/1273
This commit is contained in:
Behdad Esfahbod 2018-10-23 02:51:42 -07:00
parent 205737acdc
commit 6d40eb8372
8 changed files with 606 additions and 523 deletions

View File

@ -0,0 +1,97 @@
# Copied from https://docs.microsoft.com/en-us/typography/script-development/use
# On October 23, 2018; with documentd dated 02/07/2018.
0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E
0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA
0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I
0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U
090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E
090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E
090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E
0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O
0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E
0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O
0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E
0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O
0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E
0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU
0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI
0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E
0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE
0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE
0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE
0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW
0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE
0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE
0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA
098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R
098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L
0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA
0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I
0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II
0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U
0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU
0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE
0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI
0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO
0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU
0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA
0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E
0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E
0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI
0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O
0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O
0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E
0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU
0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI
0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA
0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA
0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK
0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK
0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK
0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU
0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK
0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK
0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK
0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA
0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU
0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA
0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK
0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK
0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E
0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA
0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK
0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA
0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA
0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA
0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA
0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA
0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA
0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA
0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA
11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA
1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R
1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E
11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA
11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E
11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O
11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU
112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA
112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E
112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI
112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O
112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU
11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA
114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R
114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR
1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E
1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E
11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E
11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI
11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E
11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI

View File

@ -317,9 +317,9 @@ use-table: gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.tx
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-use-table.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-use-table.cc; false)
vowel-constraints: gen-vowel-constraints.py use Scripts.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-vowel-constraints.hh \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-vowel-constraints.hh; false)
vowel-constraints: gen-vowel-constraints.py HBIndicVowelConstraints.txt Scripts.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc \
|| ($(RM) $(srcdir)/hb-ot-shape-complex-vowel-constraints.cc; false)
emoji-table: gen-emoji-table.py emoji-data.txt
$(AM_V_GEN) $(builddir)/$^ > $(srcdir)/hb-unicode-emoji-table.hh \

View File

@ -142,6 +142,7 @@ HB_OT_sources = \
hb-ot-shape-complex-use.cc \
hb-ot-shape-complex-use.hh \
hb-ot-shape-complex-use-table.cc \
hb-ot-shape-complex-vowel-constraints.cc \
hb-ot-shape-complex-vowel-constraints.hh \
hb-ot-shape-complex.hh \
hb-ot-shape-normalize.hh \

View File

@ -2,12 +2,10 @@
"""Generator of the function to prohibit certain vowel sequences.
It creates ``preprocess_text_vowel_constraints``, which inserts dotted
It creates ``_hb_preprocess_text_vowel_constraints``, which inserts dotted
circles into sequences prohibited by the USE script development spec.
This function should be used as the ``preprocess_text`` of an
``hb_ot_complex_shaper_t``.
It also creates the helper function ``_output_with_dotted_circle``.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
@ -27,23 +25,9 @@ import io
import sys
if len (sys.argv) != 3:
print ('usage: ./gen-vowel-constraints.py use Scripts.txt', file=sys.stderr)
print ('usage: ./gen-vowel-constraints.py HBIndicVowelConstraints.txt Scripts.txt', file=sys.stderr)
sys.exit (1)
try:
from html import unescape
def html_unescape (parser, entity):
return unescape (entity)
except ImportError:
def html_unescape (parser, entity):
return parser.unescape (entity)
def expect (condition, message=None):
if not condition:
if message is None:
raise AssertionError
raise AssertionError (message)
with io.open (sys.argv[2], encoding='utf-8') as f:
scripts_header = [f.readline () for i in range (2)]
scripts = {}
@ -142,74 +126,22 @@ class ConstraintSet (object):
s.append ('{}}}\n'.format (indent))
return ''.join (s)
class USESpecParser (HTMLParser):
"""A parser for the USE script development spec.
Attributes:
header (str): The ``updated_at`` timestamp of the spec.
constraints (Mapping[str, ConstraintSet]): A map of script names
to the scripts' prohibited sequences.
"""
def __init__ (self):
HTMLParser.__init__ (self)
self.header = ''
self.constraints = {}
# Whether the next <code> contains the vowel constraints.
self._primed = False
# Whether the parser is in the <code> element with the constraints.
self._in_constraints = False
# The text of the constraints.
self._constraints = ''
def handle_starttag (self, tag, attrs):
if tag == 'meta':
for attr, value in attrs:
if attr == 'name' and value == 'updated_at':
self.header = self.get_starttag_text ()
break
elif tag == 'a':
for attr, value in attrs:
if attr == 'id' and value == 'ivdvconstraints':
self._primed = True
break
elif self._primed and tag == 'code':
self._primed = False
self._in_constraints = True
def handle_endtag (self, tag):
self._in_constraints = False
def handle_data (self, data):
if self._in_constraints:
self._constraints += data
def handle_charref (self, name):
self.handle_data (html_unescape (self, '&#%s;' % name))
def handle_entityref (self, name):
self.handle_data (html_unescape (self, '&%s;' % name))
def parse (self, filename):
"""Parse the USE script development spec.
Args:
filename (str): The file name of the spec.
"""
with io.open (filename, encoding='utf-8') as f:
self.feed (f.read ())
expect (self.header, 'No header found')
for line in self._constraints.splitlines ():
constraint = [int (cp, 16) for cp in line.split (';')[0].strip ().split (' ')]
expect (2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint))
script = scripts[constraint[0]]
if script in self.constraints:
self.constraints[script].add (constraint)
else:
self.constraints[script] = ConstraintSet (constraint)
expect (self.constraints, 'No constraints found')
use_parser = USESpecParser ()
use_parser.parse (sys.argv[1])
constraints = {}
with io.open (sys.argv[1], encoding='utf-8') as f:
constraints_header = [f.readline ().strip () for i in range (2)]
for line in f:
j = line.find ('#')
if j >= 0:
line = line[:j]
constraint = [int (cp, 16) for cp in line.split (';')[0].split ()]
if not constraint: continue
assert 2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint)
script = scripts[constraint[0]]
if script in constraints:
constraints[script].add (constraint)
else:
constraints[script] = ConstraintSet (constraint)
assert constraints, 'No constraints found'
print ('/* == Start of generated functions == */')
print ('/*')
@ -219,15 +151,15 @@ print (' * %s use Scripts.txt' % sys.argv[0])
print (' *')
print (' * on files with these headers:')
print (' *')
print (' * %s' % use_parser.header.strip ())
for line in constraints_header:
print (' * %s' % line.strip ())
print (' *')
for line in scripts_header:
print (' * %s' % line.strip ())
print (' */')
print ()
print ('#ifndef HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH')
print ('#define HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH')
print ('#include "hb-ot-shape-complex-vowel-constraints.hh"')
print ()
print ('static void')
print ('_output_with_dotted_circle (hb_buffer_t *buffer)')
print ('{')
@ -238,10 +170,10 @@ print (' buffer->next_glyph ();')
print ('}')
print ()
print ('static void')
print ('preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,')
print ('\t\t\t\t hb_buffer_t *buffer,')
print ('\t\t\t\t hb_font_t *font)')
print ('void')
print ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,')
print ('\t\t\t\t hb_buffer_t *buffer,')
print ('\t\t\t\t hb_font_t *font)')
print ('{')
print (' /* UGLY UGLY UGLY business of adding dotted-circle in the middle of')
print (' * vowel-sequences that look like another vowel. Data for each script')
@ -255,7 +187,7 @@ print (' unsigned int count = buffer->len;')
print (' switch ((unsigned) buffer->props.script)')
print (' {')
for script, constraints in sorted (use_parser.constraints.items (), key=lambda s_c: script_order[s_c[0]]):
for script, constraints in sorted (constraints.items (), key=lambda s_c: script_order[s_c[0]]):
print (' case HB_SCRIPT_{}:'.format (script.upper ()))
print (' for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)')
print (' {')
@ -280,7 +212,5 @@ print (' buffer->swap_buffers ();')
print (' }')
print ('}')
print ()
print ('#endif /* HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH */')
print ()
print ('/* == End of generated functions == */')

View File

@ -1517,6 +1517,14 @@ clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
}
static void
preprocess_text_indic (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
_hb_preprocess_text_vowel_constraints (plan, buffer, font);
}
static bool
decompose_indic (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t ab,
@ -1616,7 +1624,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
override_features_indic,
data_create_indic,
data_destroy_indic,
preprocess_text_vowel_constraints,
preprocess_text_indic,
nullptr, /* postprocess_glyphs */
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
decompose_indic,

View File

@ -572,6 +572,15 @@ reorder (const hb_ot_shape_plan_t *plan,
HB_BUFFER_DEALLOCATE_VAR (buffer, use_category);
}
static void
preprocess_text_use (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
_hb_preprocess_text_vowel_constraints (plan, buffer, font);
}
static bool
compose_use (const hb_ot_shape_normalize_context_t *c,
hb_codepoint_t a,
@ -592,7 +601,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use =
nullptr, /* override_features */
data_create_use,
data_destroy_use,
preprocess_text_vowel_constraints,
preprocess_text_use,
nullptr, /* postprocess_glyphs */
HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
nullptr, /* decompose */

View File

@ -0,0 +1,433 @@
/* == Start of generated functions == */
/*
* The following functions are generated by running:
*
* ./gen-vowel-constraints.py use Scripts.txt
*
* on files with these headers:
*
* # Copied from https://docs.microsoft.com/en-us/typography/script-development/use
* # On October 23, 2018; with documentd dated 02/07/2018.
*
* # Scripts-11.0.0.txt
* # Date: 2018-02-21, 05:34:31 GMT
*/
#include "hb-ot-shape-complex-vowel-constraints.hh"
static void
_output_with_dotted_circle (hb_buffer_t *buffer)
{
hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);
_hb_glyph_info_reset_continuation (&dottedcircle);
buffer->next_glyph ();
}
void
_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
/* UGLY UGLY UGLY business of adding dotted-circle in the middle of
* vowel-sequences that look like another vowel. Data for each script
* collected from the USE script development spec.
*
* https://github.com/harfbuzz/harfbuzz/issues/1019
*/
bool processed = false;
buffer->clear_output ();
unsigned int count = buffer->len;
switch ((unsigned) buffer->props.script)
{
case HB_SCRIPT_DEVANAGARI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0905u:
switch (buffer->cur (1).codepoint)
{
case 0x093Au: case 0x093Bu: case 0x093Eu: case 0x0945u:
case 0x0946u: case 0x0949u: case 0x094Au: case 0x094Bu:
case 0x094Cu: case 0x094Fu: case 0x0956u: case 0x0957u:
matched = true;
break;
}
break;
case 0x0906u:
switch (buffer->cur (1).codepoint)
{
case 0x093Au: case 0x0945u: case 0x0946u: case 0x0947u:
case 0x0948u:
matched = true;
break;
}
break;
case 0x0909u:
matched = 0x0941u == buffer->cur (1).codepoint;
break;
case 0x090Fu:
switch (buffer->cur (1).codepoint)
{
case 0x0945u: case 0x0946u: case 0x0947u:
matched = true;
break;
}
break;
case 0x0930u:
if (0x094Du == buffer->cur (1).codepoint &&
buffer->idx + 2 < count &&
0x0907u == buffer->cur (2).codepoint)
{
buffer->next_glyph ();
buffer->next_glyph ();
buffer->output_glyph (0x25CCu);
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_BENGALI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0985u:
matched = 0x09BEu == buffer->cur (1).codepoint;
break;
case 0x098Bu:
matched = 0x09C3u == buffer->cur (1).codepoint;
break;
case 0x098Cu:
matched = 0x09E2u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_GURMUKHI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0A05u:
switch (buffer->cur (1).codepoint)
{
case 0x0A3Eu: case 0x0A48u: case 0x0A4Cu:
matched = true;
break;
}
break;
case 0x0A72u:
switch (buffer->cur (1).codepoint)
{
case 0x0A3Fu: case 0x0A40u: case 0x0A47u:
matched = true;
break;
}
break;
case 0x0A73u:
switch (buffer->cur (1).codepoint)
{
case 0x0A41u: case 0x0A42u: case 0x0A4Bu:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_GUJARATI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0A85u:
switch (buffer->cur (1).codepoint)
{
case 0x0ABEu: case 0x0AC5u: case 0x0AC7u: case 0x0AC8u:
case 0x0AC9u: case 0x0ACBu: case 0x0ACCu:
matched = true;
break;
}
break;
case 0x0AC5u:
matched = 0x0ABEu == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_ORIYA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0B05u:
matched = 0x0B3Eu == buffer->cur (1).codepoint;
break;
case 0x0B0Fu: case 0x0B13u:
matched = 0x0B57u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_TELUGU:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0C12u:
switch (buffer->cur (1).codepoint)
{
case 0x0C4Cu: case 0x0C55u:
matched = true;
break;
}
break;
case 0x0C3Fu: case 0x0C46u: case 0x0C4Au:
matched = 0x0C55u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_KANNADA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0C89u: case 0x0C8Bu:
matched = 0x0CBEu == buffer->cur (1).codepoint;
break;
case 0x0C92u:
matched = 0x0CCCu == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_MALAYALAM:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0D07u: case 0x0D09u:
matched = 0x0D57u == buffer->cur (1).codepoint;
break;
case 0x0D0Eu:
matched = 0x0D46u == buffer->cur (1).codepoint;
break;
case 0x0D12u:
switch (buffer->cur (1).codepoint)
{
case 0x0D3Eu: case 0x0D57u:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_SINHALA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0D85u:
switch (buffer->cur (1).codepoint)
{
case 0x0DCFu: case 0x0DD0u: case 0x0DD1u:
matched = true;
break;
}
break;
case 0x0D8Bu: case 0x0D8Fu: case 0x0D94u:
matched = 0x0DDFu == buffer->cur (1).codepoint;
break;
case 0x0D8Du:
matched = 0x0DD8u == buffer->cur (1).codepoint;
break;
case 0x0D91u:
switch (buffer->cur (1).codepoint)
{
case 0x0DCAu: case 0x0DD9u: case 0x0DDAu: case 0x0DDCu:
case 0x0DDDu:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_BRAHMI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11005u:
matched = 0x11038u == buffer->cur (1).codepoint;
break;
case 0x1100Bu:
matched = 0x1103Eu == buffer->cur (1).codepoint;
break;
case 0x1100Fu:
matched = 0x11042u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_KHUDAWADI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x112B0u:
switch (buffer->cur (1).codepoint)
{
case 0x112E0u: case 0x112E5u: case 0x112E6u: case 0x112E7u:
case 0x112E8u:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_TIRHUTA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11481u:
matched = 0x114B0u == buffer->cur (1).codepoint;
break;
case 0x1148Bu: case 0x1148Du:
matched = 0x114BAu == buffer->cur (1).codepoint;
break;
case 0x114AAu:
switch (buffer->cur (1).codepoint)
{
case 0x114B5u: case 0x114B6u:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_MODI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11600u: case 0x11601u:
switch (buffer->cur (1).codepoint)
{
case 0x11639u: case 0x1163Au:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_TAKRI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11680u:
switch (buffer->cur (1).codepoint)
{
case 0x116ADu: case 0x116B4u: case 0x116B5u:
matched = true;
break;
}
break;
case 0x11686u:
matched = 0x116B2u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
default:
break;
}
if (processed)
{
if (buffer->idx < count)
buffer->next_glyph ();
if (likely (buffer->successful))
buffer->swap_buffers ();
}
}
/* == End of generated functions == */

View File

@ -1,434 +1,39 @@
/* == Start of generated functions == */
/*
* The following functions are generated by running:
* Copyright © 2018 Google, Inc.
*
* ./gen-vowel-constraints.py use Scripts.txt
* This is part of HarfBuzz, a text shaping library.
*
* on files with these headers:
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* <meta name="updated_at" content="2018-03-27 12:21 AM" />
* # Scripts-11.0.0.txt
* # Date: 2018-02-21, 05:34:31 GMT
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH
#define HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH
static void
_output_with_dotted_circle (hb_buffer_t *buffer)
{
hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);
_hb_glyph_info_reset_continuation (&dottedcircle);
#include "hb.hh"
buffer->next_glyph ();
}
#include "hb-ot-shape-complex.hh"
static void
preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font)
{
/* UGLY UGLY UGLY business of adding dotted-circle in the middle of
* vowel-sequences that look like another vowel. Data for each script
* collected from the USE script development spec.
*
* https://github.com/harfbuzz/harfbuzz/issues/1019
*/
bool processed = false;
buffer->clear_output ();
unsigned int count = buffer->len;
switch ((unsigned) buffer->props.script)
{
case HB_SCRIPT_DEVANAGARI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0905u:
switch (buffer->cur (1).codepoint)
{
case 0x093Au: case 0x093Bu: case 0x093Eu: case 0x0945u:
case 0x0946u: case 0x0949u: case 0x094Au: case 0x094Bu:
case 0x094Cu: case 0x094Fu: case 0x0956u: case 0x0957u:
matched = true;
break;
}
break;
case 0x0906u:
switch (buffer->cur (1).codepoint)
{
case 0x093Au: case 0x0945u: case 0x0946u: case 0x0947u:
case 0x0948u:
matched = true;
break;
}
break;
case 0x0909u:
matched = 0x0941u == buffer->cur (1).codepoint;
break;
case 0x090Fu:
switch (buffer->cur (1).codepoint)
{
case 0x0945u: case 0x0946u: case 0x0947u:
matched = true;
break;
}
break;
case 0x0930u:
if (0x094Du == buffer->cur (1).codepoint &&
buffer->idx + 2 < count &&
0x0907u == buffer->cur (2).codepoint)
{
buffer->next_glyph ();
buffer->next_glyph ();
buffer->output_glyph (0x25CCu);
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_BENGALI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0985u:
matched = 0x09BEu == buffer->cur (1).codepoint;
break;
case 0x098Bu:
matched = 0x09C3u == buffer->cur (1).codepoint;
break;
case 0x098Cu:
matched = 0x09E2u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_GURMUKHI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0A05u:
switch (buffer->cur (1).codepoint)
{
case 0x0A3Eu: case 0x0A48u: case 0x0A4Cu:
matched = true;
break;
}
break;
case 0x0A72u:
switch (buffer->cur (1).codepoint)
{
case 0x0A3Fu: case 0x0A40u: case 0x0A47u:
matched = true;
break;
}
break;
case 0x0A73u:
switch (buffer->cur (1).codepoint)
{
case 0x0A41u: case 0x0A42u: case 0x0A4Bu:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_GUJARATI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0A85u:
switch (buffer->cur (1).codepoint)
{
case 0x0ABEu: case 0x0AC5u: case 0x0AC7u: case 0x0AC8u:
case 0x0AC9u: case 0x0ACBu: case 0x0ACCu:
matched = true;
break;
}
break;
case 0x0AC5u:
matched = 0x0ABEu == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_ORIYA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0B05u:
matched = 0x0B3Eu == buffer->cur (1).codepoint;
break;
case 0x0B0Fu: case 0x0B13u:
matched = 0x0B57u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_TELUGU:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0C12u:
switch (buffer->cur (1).codepoint)
{
case 0x0C4Cu: case 0x0C55u:
matched = true;
break;
}
break;
case 0x0C3Fu: case 0x0C46u: case 0x0C4Au:
matched = 0x0C55u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_KANNADA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0C89u: case 0x0C8Bu:
matched = 0x0CBEu == buffer->cur (1).codepoint;
break;
case 0x0C92u:
matched = 0x0CCCu == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_MALAYALAM:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0D07u: case 0x0D09u:
matched = 0x0D57u == buffer->cur (1).codepoint;
break;
case 0x0D0Eu:
matched = 0x0D46u == buffer->cur (1).codepoint;
break;
case 0x0D12u:
switch (buffer->cur (1).codepoint)
{
case 0x0D3Eu: case 0x0D57u:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_SINHALA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x0D85u:
switch (buffer->cur (1).codepoint)
{
case 0x0DCFu: case 0x0DD0u: case 0x0DD1u:
matched = true;
break;
}
break;
case 0x0D8Bu: case 0x0D8Fu: case 0x0D94u:
matched = 0x0DDFu == buffer->cur (1).codepoint;
break;
case 0x0D8Du:
matched = 0x0DD8u == buffer->cur (1).codepoint;
break;
case 0x0D91u:
switch (buffer->cur (1).codepoint)
{
case 0x0DCAu: case 0x0DD9u: case 0x0DDAu: case 0x0DDCu:
case 0x0DDDu:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_BRAHMI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11005u:
matched = 0x11038u == buffer->cur (1).codepoint;
break;
case 0x1100Bu:
matched = 0x1103Eu == buffer->cur (1).codepoint;
break;
case 0x1100Fu:
matched = 0x11042u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_KHUDAWADI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x112B0u:
switch (buffer->cur (1).codepoint)
{
case 0x112E0u: case 0x112E5u: case 0x112E6u: case 0x112E7u:
case 0x112E8u:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_TIRHUTA:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11481u:
matched = 0x114B0u == buffer->cur (1).codepoint;
break;
case 0x1148Bu: case 0x1148Du:
matched = 0x114BAu == buffer->cur (1).codepoint;
break;
case 0x114AAu:
switch (buffer->cur (1).codepoint)
{
case 0x114B5u: case 0x114B6u:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_MODI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11600u: case 0x11601u:
switch (buffer->cur (1).codepoint)
{
case 0x11639u: case 0x1163Au:
matched = true;
break;
}
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
case HB_SCRIPT_TAKRI:
for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)
{
bool matched = false;
switch (buffer->cur ().codepoint)
{
case 0x11680u:
switch (buffer->cur (1).codepoint)
{
case 0x116ADu: case 0x116B4u: case 0x116B5u:
matched = true;
break;
}
break;
case 0x11686u:
matched = 0x116B2u == buffer->cur (1).codepoint;
break;
}
buffer->next_glyph ();
if (matched) _output_with_dotted_circle (buffer);
}
processed = true;
break;
default:
break;
}
if (processed)
{
if (buffer->idx < count)
buffer->next_glyph ();
if (likely (buffer->successful))
buffer->swap_buffers ();
}
}
HB_INTERNAL void
_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan,
hb_buffer_t *buffer,
hb_font_t *font);
#endif /* HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH */
/* == End of generated functions == */