From 7bffb5d7baf4dc8b96fab1f8dbc83740a6817b1a Mon Sep 17 00:00:00 2001 From: David Corbett Date: Tue, 11 Aug 2020 19:24:46 -0400 Subject: [PATCH] Generate `has_arabic_joining` --- src/Makefile.am | 1 + src/Makefile.sources | 1 + src/gen-arabic-joining-list.py | 104 ++++++++++++++++++ ...hb-ot-shape-complex-arabic-joining-list.hh | 47 ++++++++ src/hb-ot-shape-complex-use.cc | 35 +----- src/meson.build | 1 + src/update-unicode-tables.make | 3 + 7 files changed, 158 insertions(+), 34 deletions(-) create mode 100755 src/gen-arabic-joining-list.py create mode 100644 src/hb-ot-shape-complex-arabic-joining-list.hh diff --git a/src/Makefile.am b/src/Makefile.am index c18adac2d..c341d4050 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -248,6 +248,7 @@ harfbuzz-deprecated-symbols.txt: $(srcdir)/hb-deprecated.h GENERATORS = \ + gen-arabic-joining-list.py \ gen-arabic-table.py \ gen-def.py \ gen-emoji-table.py \ diff --git a/src/Makefile.sources b/src/Makefile.sources index 4ad11e2bc..8bb291206 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -105,6 +105,7 @@ HB_BASE_sources = \ hb-ot-post-macroman.hh \ hb-ot-post-table.hh \ hb-ot-shape-complex-arabic-fallback.hh \ + hb-ot-shape-complex-arabic-joining-list.hh \ hb-ot-shape-complex-arabic-table.hh \ hb-ot-shape-complex-arabic-win1256.hh \ hb-ot-shape-complex-arabic.cc \ diff --git a/src/gen-arabic-joining-list.py b/src/gen-arabic-joining-list.py new file mode 100755 index 000000000..78c44c35d --- /dev/null +++ b/src/gen-arabic-joining-list.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +"""usage: ./gen-arabic-joining-table.py ArabicShaping.txt Scripts.txt + +Input files: +* https://unicode.org/Public/UCD/latest/ucd/ArabicShaping.txt +* https://unicode.org/Public/UCD/latest/ucd/Scripts.txt +""" + +import os.path, sys + +if len (sys.argv) != 3: + sys.exit (__doc__) + +files = [open (x, encoding='utf-8') for x in sys.argv[1:]] + +headers = [[f.readline (), f.readline ()] for f in files] +while files[0].readline ().find ('##################') < 0: + pass + +def read (f): + mapping = {} + for line in f: + + j = line.find ('#') + if j >= 0: + line = line[:j] + + fields = [x.strip () for x in line.split (';')] + if len (fields) == 1: + continue + + uu = fields[0].split ('..') + start = int (uu[0], 16) + if len (uu) == 1: + end = start + else: + end = int (uu[1], 16) + + t = fields[1] + + for u in range (start, end + 1): + mapping[u] = t + + return mapping + +def read_joining_uu (f): + values = set () + for line in f: + + if line[0] == '#': + continue + + fields = [x.strip () for x in line.split (';')] + if len (fields) == 1: + continue + + values.add (int (fields[0], 16)) + + return sorted (values) + +def print_has_arabic_joining (scripts, joining_uu): + + print ("static bool") + print ("has_arabic_joining (hb_script_t script)") + print ("{") + print (" /* List of scripts that have data in arabic-table. */") + print (" switch ((int) script)") + print (" {") + + for script in sorted ({scripts[u] for u in joining_uu if scripts[u] not in {'Common', 'Inherited'}}): + print (" case HB_SCRIPT_{}:".format (script.upper ())) + + print (" return true;") + print () + print (" default:") + print (" return false;") + print (" }") + print ("}") + print () + +print ("/* == Start of generated function == */") +print ("/*") +print (" * The following function is generated by running:") +print (" *") +print (" * ./gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt") +print (" *") +print (" * on files with these headers:") +print (" *") +for h in headers: + for l in h: + print (" * %s" % (l.strip ())) +print (" */") +print () +print ("#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH") +print ("#define HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH") +print () + +print_has_arabic_joining (read (files[1]), read_joining_uu (files[0])) + +print () +print ("#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH */") +print () +print ("/* == End of generated function == */") diff --git a/src/hb-ot-shape-complex-arabic-joining-list.hh b/src/hb-ot-shape-complex-arabic-joining-list.hh new file mode 100644 index 000000000..140310333 --- /dev/null +++ b/src/hb-ot-shape-complex-arabic-joining-list.hh @@ -0,0 +1,47 @@ +/* == Start of generated function == */ +/* + * The following function is generated by running: + * + * ./gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt + * + * on files with these headers: + * + * # ArabicShaping-13.0.0.txt + * # Date: 2020-01-31, 23:55:00 GMT [KW, RP] + * # Scripts-13.0.0.txt + * # Date: 2020-01-22, 00:07:43 GMT + */ + +#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH +#define HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH + +static bool +has_arabic_joining (hb_script_t script) +{ + /* List of scripts that have data in arabic-table. */ + switch ((int) script) + { + case HB_SCRIPT_ADLAM: + case HB_SCRIPT_ARABIC: + case HB_SCRIPT_CHORASMIAN: + case HB_SCRIPT_HANIFI_ROHINGYA: + case HB_SCRIPT_KAITHI: + case HB_SCRIPT_MANDAIC: + case HB_SCRIPT_MANICHAEAN: + case HB_SCRIPT_MONGOLIAN: + case HB_SCRIPT_NKO: + case HB_SCRIPT_PHAGS_PA: + case HB_SCRIPT_PSALTER_PAHLAVI: + case HB_SCRIPT_SOGDIAN: + case HB_SCRIPT_SYRIAC: + return true; + + default: + return false; + } +} + + +#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH */ + +/* == End of generated function == */ diff --git a/src/hb-ot-shape-complex-use.cc b/src/hb-ot-shape-complex-use.cc index 10f5822c0..981df2d59 100644 --- a/src/hb-ot-shape-complex-use.cc +++ b/src/hb-ot-shape-complex-use.cc @@ -32,6 +32,7 @@ #include "hb-ot-shape-complex-use.hh" #include "hb-ot-shape-complex-arabic.hh" +#include "hb-ot-shape-complex-arabic-joining-list.hh" #include "hb-ot-shape-complex-vowel-constraints.hh" /* buffer var allocations */ @@ -152,40 +153,6 @@ struct use_shape_plan_t arabic_shape_plan_t *arabic_plan; }; -static bool -has_arabic_joining (hb_script_t script) -{ - /* List of scripts that have data in arabic-table. */ - switch ((int) script) - { - /* Unicode-1.1 additions */ - case HB_SCRIPT_ARABIC: - - /* Unicode-3.0 additions */ - case HB_SCRIPT_MONGOLIAN: - case HB_SCRIPT_SYRIAC: - - /* Unicode-5.0 additions */ - case HB_SCRIPT_NKO: - case HB_SCRIPT_PHAGS_PA: - - /* Unicode-6.0 additions */ - case HB_SCRIPT_MANDAIC: - - /* Unicode-7.0 additions */ - case HB_SCRIPT_MANICHAEAN: - case HB_SCRIPT_PSALTER_PAHLAVI: - - /* Unicode-9.0 additions */ - case HB_SCRIPT_ADLAM: - - return true; - - default: - return false; - } -} - static void * data_create_use (const hb_ot_shape_plan_t *plan) { diff --git a/src/meson.build b/src/meson.build index 853e0eb10..a4abfb726 100644 --- a/src/meson.build +++ b/src/meson.build @@ -104,6 +104,7 @@ hb_base_sources = files( 'hb-ot-post-macroman.hh', 'hb-ot-post-table.hh', 'hb-ot-shape-complex-arabic-fallback.hh', + 'hb-ot-shape-complex-arabic-joining-list.hh', 'hb-ot-shape-complex-arabic-table.hh', 'hb-ot-shape-complex-arabic-win1256.hh', 'hb-ot-shape-complex-arabic.cc', diff --git a/src/update-unicode-tables.make b/src/update-unicode-tables.make index c2d879a39..93b1ccaab 100755 --- a/src/update-unicode-tables.make +++ b/src/update-unicode-tables.make @@ -1,6 +1,7 @@ #!/usr/bin/env -S make -f all: packtab \ + hb-ot-shape-complex-arabic-joining-list.hh \ hb-ot-shape-complex-arabic-table.hh hb-unicode-emoji-table.hh \ hb-ot-shape-complex-indic-table.cc hb-ot-tag-table.hh \ hb-ucd-table.hh hb-ot-shape-complex-use-table.cc \ @@ -8,6 +9,8 @@ all: packtab \ .PHONY: all clean packtab +hb-ot-shape-complex-arabic-joining-list.hh: gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt + ./$^ > $@ || ($(RM) $@; false) hb-ot-shape-complex-arabic-table.hh: gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt ./$^ > $@ || ($(RM) $@; false) hb-unicode-emoji-table.hh: gen-emoji-table.py emoji-data.txt