From ddf87ffb22d30d6b15083f1f6d5d4fe20417b538 Mon Sep 17 00:00:00 2001 From: Khaled Hosny Date: Thu, 29 Jul 2021 01:12:46 +0200 Subject: [PATCH] [test] Add generated tests for emoji clusters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes https://github.com/harfbuzz/harfbuzz/issues/3017 Uses AdobeBlank2.ttf from: https://github.com/adobe-fonts/adobe-blank-2 instead of a dummy empty font so that everything maps to GID 1 and control code points are kept instead of being dropped because there is not space glyph (otherwise we’d need to identify control code points somehow when generating the expectations). --- src/gen-emoji-table.py | 39 ++++++++++- test/shaping/data/in-house/Makefile.sources | 1 + .../data/in-house/fonts/AdobeBlank2.ttf | Bin 0 -> 2360 bytes test/shaping/data/in-house/meson.build | 1 + .../data/in-house/tests/emoji-clusters.tests | 66 ++++++++++++++++++ 5 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 test/shaping/data/in-house/fonts/AdobeBlank2.ttf create mode 100644 test/shaping/data/in-house/tests/emoji-clusters.tests diff --git a/src/gen-emoji-table.py b/src/gen-emoji-table.py index 1bdd402f8..dc69ba673 100755 --- a/src/gen-emoji-table.py +++ b/src/gen-emoji-table.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 -"""usage: ./gen-emoji-table.py emoji-data.txt +"""usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt Input file: * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt +* https://www.unicode.org/Public/emoji/latest/emoji-test.txt """ import sys from collections import OrderedDict import packTab -if len (sys.argv) != 2: +if len (sys.argv) != 3: sys.exit (__doc__) f = open(sys.argv[1]) @@ -74,3 +75,37 @@ print () print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") print () print ("/* == End of generated table == */") + + +# Generate test file. +sequences = [] +with open(sys.argv[2]) as f: + for line in f.readlines(): + if "#" in line: + line = line[:line.index("#")] + if ";" in line: + line = line[:line.index(";")] + line = line.strip() + if not line: + continue + line = line.split(" ") + if len(line) == 1: + continue + sequences.append(line) + +# Split into number of sequences per line, too small number slows the test, and +# too big overwhelms the test runner. +CHUNK = 50 +with open("../test/shaping/data/in-house/tests/emoji-clusters.tests", "w") as f: + for i in range(0, len(sequences), CHUNK): + outputs = [] + inputs = [] + cluster = 0 + for sequence in sequences[i:i + CHUNK]: + outputs.append("|".join(f"1={cluster}" for c in sequence)) + inputs.append(",".join(sequence)) + cluster += len(sequence) + + f.write("../fonts/AdobeBlank2.ttf:--no-glyph-names --no-positions --font-funcs=ot") + f.write(":" + ",".join(inputs)) + f.write(":[" + "|".join(outputs) + "]\n") diff --git a/test/shaping/data/in-house/Makefile.sources b/test/shaping/data/in-house/Makefile.sources index a9424ddc8..13066c7ae 100644 --- a/test/shaping/data/in-house/Makefile.sources +++ b/test/shaping/data/in-house/Makefile.sources @@ -16,6 +16,7 @@ TESTS = \ tests/default-ignorables.tests \ tests/digits.tests \ tests/emoji.tests \ + tests/emoji-clusters.tests \ tests/fallback-positioning.tests \ tests/hangul-jamo.tests \ tests/hyphens.tests \ diff --git a/test/shaping/data/in-house/fonts/AdobeBlank2.ttf b/test/shaping/data/in-house/fonts/AdobeBlank2.ttf new file mode 100644 index 0000000000000000000000000000000000000000..8a4fb0031d230da4a82fb153a8c98d1f56c2eeef GIT binary patch literal 2360 zcmbVNO=w(I6#njelgUi}CTSt52H~ZoXk;=;#w5lX&5yBmXmCm<{-U;%`3;?!yv|J0 z6oR-BNmsfL z_PqFK-CXg*N56O#uxLjy!cr=UK0dHtqwX&zSKUB2_R;<_b*GdpW-74{j?;b( zXgKUHRjMD2E`HCWo3szO<%~OV`hlltKT5qxDusoq?CG(s6QS|nVe@SqKYaY%1k|AM>Po15y0E z<0SHr`jz^hzt-!a4NQnWH8UN@Bv7xL99*oP98h(!nc?Wn$eSLDnKu!&pjGR_g^c~4 z_)$)>q791L4G-7uFn&#A;Yb-Q^;-tp#J0gwo6lgWx4~dPvEN|c0yG*dBm@i&5(f>I zESn5&CT=#kg}B9FsiD>2HsUse$t^+#w-dK}crw)1+S1$<3^e*1e72 z_k=86T-&>9+-UFG)axz1-jub*xygBp^JaheyZ-RE{b5WtkrkyZ%eK)u!O_`&br9E7 z?}4}9d59EUQ0wGRs@J!bU0yS{$c7(5c7oaN@961J9qMt_)77CGRnMArr#7yJy0>pw z3tYO@Q0)`j-8x|3-YVKpZzm6_OMP3{)u)HHUuR&BW}Ihdl}{LakJG~n4^s26g*IF< z*v1&v4fgHvX+Ra5X5Wual{YwmVfCiL&BR|B+-kkAelxfY!@e~)Bg|6re3EBoFpaoIJ5hJfMvX_D zKF{JsMkzDrMdn%3ad+b!XI=89-JLwWsye?CR%jd2wg@{i#;8SRcZ%`ECre9#Q5W?2 z5o)6t(fLL9)U-FN|HG&B=)ZAUWrtkmHh_6{U{QDJh^|?>oFK|F(iv7zrk7(pq=FRn z68$smkVC#pl){-q|Gf51u-~)PPoGGtA0kM|jF$mCOjQ70tVR5yEhhyTY(!F7+pfg!}vF)nWI% zs5@sXx)&)%&vK?1)%$kji%Igk$>==gMwFR{+=rcSph92aPr5DHiM7g|Dy6V1_8ifQ zK3n2$iMqr4wZzy`*)n~k4!KiaHUD