Generate `has_arabic_joining`

This commit is contained in:
David Corbett 2020-08-11 19:24:46 -04:00 committed by ebraminio
parent 6f754852c1
commit 7bffb5d7ba
7 changed files with 158 additions and 34 deletions

View File

@ -248,6 +248,7 @@ harfbuzz-deprecated-symbols.txt: $(srcdir)/hb-deprecated.h
GENERATORS = \
gen-arabic-joining-list.py \
gen-arabic-table.py \
gen-def.py \
gen-emoji-table.py \

View File

@ -105,6 +105,7 @@ HB_BASE_sources = \
hb-ot-post-macroman.hh \
hb-ot-post-table.hh \
hb-ot-shape-complex-arabic-fallback.hh \
hb-ot-shape-complex-arabic-joining-list.hh \
hb-ot-shape-complex-arabic-table.hh \
hb-ot-shape-complex-arabic-win1256.hh \
hb-ot-shape-complex-arabic.cc \

104
src/gen-arabic-joining-list.py Executable file
View File

@ -0,0 +1,104 @@
#!/usr/bin/env python3
"""usage: ./gen-arabic-joining-table.py ArabicShaping.txt Scripts.txt
Input files:
* https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt
* https://unicode.org/Public/UCD/latest/ucd/Scripts.txt
"""
import os.path, sys
if len (sys.argv) != 3:
sys.exit (__doc__)
files = [open (x, encoding='utf-8') for x in sys.argv[1:]]
headers = [[f.readline (), f.readline ()] for f in files]
while files[0].readline ().find ('##################') < 0:
pass
def read (f):
mapping = {}
for line in f:
j = line.find ('#')
if j >= 0:
line = line[:j]
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
uu = fields[0].split ('..')
start = int (uu[0], 16)
if len (uu) == 1:
end = start
else:
end = int (uu[1], 16)
t = fields[1]
for u in range (start, end + 1):
mapping[u] = t
return mapping
def read_joining_uu (f):
values = set ()
for line in f:
if line[0] == '#':
continue
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
values.add (int (fields[0], 16))
return sorted (values)
def print_has_arabic_joining (scripts, joining_uu):
print ("static bool")
print ("has_arabic_joining (hb_script_t script)")
print ("{")
print (" /* List of scripts that have data in arabic-table. */")
print (" switch ((int) script)")
print (" {")
for script in sorted ({scripts[u] for u in joining_uu if scripts[u] not in {'Common', 'Inherited'}}):
print (" case HB_SCRIPT_{}:".format (script.upper ()))
print (" return true;")
print ()
print (" default:")
print (" return false;")
print (" }")
print ("}")
print ()
print ("/* == Start of generated function == */")
print ("/*")
print (" * The following function is generated by running:")
print (" *")
print (" * ./gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt")
print (" *")
print (" * on files with these headers:")
print (" *")
for h in headers:
for l in h:
print (" * %s" % (l.strip ()))
print (" */")
print ()
print ("#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH")
print ("#define HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH")
print ()
print_has_arabic_joining (read (files[1]), read_joining_uu (files[0]))
print ()
print ("#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH */")
print ()
print ("/* == End of generated function == */")

View File

@ -0,0 +1,47 @@
/* == Start of generated function == */
/*
* The following function is generated by running:
*
* ./gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt
*
* on files with these headers:
*
* # ArabicShaping-13.0.0.txt
* # Date: 2020-01-31, 23:55:00 GMT [KW, RP]
* # Scripts-13.0.0.txt
* # Date: 2020-01-22, 00:07:43 GMT
*/
#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH
#define HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH
static bool
has_arabic_joining (hb_script_t script)
{
/* List of scripts that have data in arabic-table. */
switch ((int) script)
{
case HB_SCRIPT_ADLAM:
case HB_SCRIPT_ARABIC:
case HB_SCRIPT_CHORASMIAN:
case HB_SCRIPT_HANIFI_ROHINGYA:
case HB_SCRIPT_KAITHI:
case HB_SCRIPT_MANDAIC:
case HB_SCRIPT_MANICHAEAN:
case HB_SCRIPT_MONGOLIAN:
case HB_SCRIPT_NKO:
case HB_SCRIPT_PHAGS_PA:
case HB_SCRIPT_PSALTER_PAHLAVI:
case HB_SCRIPT_SOGDIAN:
case HB_SCRIPT_SYRIAC:
return true;
default:
return false;
}
}
#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH */
/* == End of generated function == */

View File

@ -32,6 +32,7 @@
#include "hb-ot-shape-complex-use.hh"
#include "hb-ot-shape-complex-arabic.hh"
#include "hb-ot-shape-complex-arabic-joining-list.hh"
#include "hb-ot-shape-complex-vowel-constraints.hh"
/* buffer var allocations */
@ -152,40 +153,6 @@ struct use_shape_plan_t
arabic_shape_plan_t *arabic_plan;
};
static bool
has_arabic_joining (hb_script_t script)
{
/* List of scripts that have data in arabic-table. */
switch ((int) script)
{
/* Unicode-1.1 additions */
case HB_SCRIPT_ARABIC:
/* Unicode-3.0 additions */
case HB_SCRIPT_MONGOLIAN:
case HB_SCRIPT_SYRIAC:
/* Unicode-5.0 additions */
case HB_SCRIPT_NKO:
case HB_SCRIPT_PHAGS_PA:
/* Unicode-6.0 additions */
case HB_SCRIPT_MANDAIC:
/* Unicode-7.0 additions */
case HB_SCRIPT_MANICHAEAN:
case HB_SCRIPT_PSALTER_PAHLAVI:
/* Unicode-9.0 additions */
case HB_SCRIPT_ADLAM:
return true;
default:
return false;
}
}
static void *
data_create_use (const hb_ot_shape_plan_t *plan)
{

View File

@ -104,6 +104,7 @@ hb_base_sources = files(
'hb-ot-post-macroman.hh',
'hb-ot-post-table.hh',
'hb-ot-shape-complex-arabic-fallback.hh',
'hb-ot-shape-complex-arabic-joining-list.hh',
'hb-ot-shape-complex-arabic-table.hh',
'hb-ot-shape-complex-arabic-win1256.hh',
'hb-ot-shape-complex-arabic.cc',

View File

@ -1,6 +1,7 @@
#!/usr/bin/env -S make -f
all: packtab \
hb-ot-shape-complex-arabic-joining-list.hh \
hb-ot-shape-complex-arabic-table.hh hb-unicode-emoji-table.hh \
hb-ot-shape-complex-indic-table.cc hb-ot-tag-table.hh \
hb-ucd-table.hh hb-ot-shape-complex-use-table.cc \
@ -8,6 +9,8 @@ all: packtab \
.PHONY: all clean packtab
hb-ot-shape-complex-arabic-joining-list.hh: gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt
./$^ > $@ || ($(RM) $@; false)
hb-ot-shape-complex-arabic-table.hh: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt
./$^ > $@ || ($(RM) $@; false)
hb-unicode-emoji-table.hh: gen-emoji-table.py emoji-data.txt