[test] Add generated tests for emoji clusters

Fixes https://github.com/harfbuzz/harfbuzz/issues/3017 Uses AdobeBlank2.ttf from: https://github.com/adobe-fonts/adobe-blank-2 instead of a dummy empty font so that everything maps to GID 1 and control code points are kept instead of being dropped because there is not space glyph (otherwise we’d need to identify control code points somehow when generating the expectations).
2021-07-29 01:12:46 +02:00 · 2021-07-29 01:12:46 +02:00 · ddf87ffb22
parent f0a1892ff9
commit ddf87ffb22
5 changed files with 105 additions and 2 deletions
--- a/src/gen-emoji-table.py
+++ b/src/gen-emoji-table.py
@ -1,16 +1,17 @@
 #!/usr/bin/env python3

-"""usage: ./gen-emoji-table.py emoji-data.txt
+"""usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt

 Input file:
 * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
+* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
 """

 import sys
 from collections import OrderedDict
 import packTab

-if len (sys.argv) != 2:
+if len (sys.argv) != 3:
 	sys.exit (__doc__)

 f = open(sys.argv[1])
@ -74,3 +75,37 @@ print ()
 print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
 print ()
 print ("/* == End of generated table == */")
+
+
+# Generate test file.
+sequences = []
+with open(sys.argv[2]) as f:
+    for line in f.readlines():
+        if "#" in line:
+            line = line[:line.index("#")]
+        if ";" in line:
+            line = line[:line.index(";")]
+        line = line.strip()
+        if not line:
+            continue
+        line = line.split(" ")
+        if len(line) == 1:
+            continue
+        sequences.append(line)
+
+# Split into number of sequences per line, too small number slows the test, and
+# too big overwhelms the test runner.
+CHUNK = 50
+with open("../test/shaping/data/in-house/tests/emoji-clusters.tests", "w") as f:
+    for i in range(0, len(sequences), CHUNK):
+        outputs = []
+        inputs = []
+        cluster = 0
+        for sequence in sequences[i:i + CHUNK]:
+            outputs.append("|".join(f"1={cluster}" for c in sequence))
+            inputs.append(",".join(sequence))
+            cluster += len(sequence)
+
+        f.write("../fonts/AdobeBlank2.ttf:--no-glyph-names --no-positions --font-funcs=ot")
+        f.write(":" + ",".join(inputs))
+        f.write(":[" + "|".join(outputs) + "]\n")
--- a/test/shaping/data/in-house/Makefile.sources
+++ b/test/shaping/data/in-house/Makefile.sources
@ -16,6 +16,7 @@ TESTS = \
 	tests/default-ignorables.tests \
 	tests/digits.tests \
 	tests/emoji.tests \
+	tests/emoji-clusters.tests \
 	tests/fallback-positioning.tests \
 	tests/hangul-jamo.tests \
 	tests/hyphens.tests \
--- a/test/shaping/data/in-house/fonts/AdobeBlank2.ttf
+++ b/test/shaping/data/in-house/fonts/AdobeBlank2.ttf
--- a/test/shaping/data/in-house/meson.build
+++ b/test/shaping/data/in-house/meson.build
@ -16,6 +16,7 @@ in_house_tests = [
  'default-ignorables.tests',
  'digits.tests',
  'emoji.tests',
+  'emoji-clusters.tests',
  'fallback-positioning.tests',
  'hangul-jamo.tests',
  'hyphens.tests',
--- a/test/shaping/data/in-house/tests/emoji-clusters.tests
+++ b/test/shaping/data/in-house/tests/emoji-clusters.tests