[test] Add generated tests for emoji clusters

Fixes https://github.com/harfbuzz/harfbuzz/issues/3017

Uses AdobeBlank2.ttf from:

  https://github.com/adobe-fonts/adobe-blank-2

instead of a dummy empty font so that everything maps to GID 1 and
control code points are kept instead of being dropped because there is
not space glyph (otherwise we’d need to identify control code points
somehow when generating the expectations).
This commit is contained in:
Khaled Hosny 2021-07-29 01:12:46 +02:00 committed by Behdad Esfahbod
parent f0a1892ff9
commit ddf87ffb22
5 changed files with 105 additions and 2 deletions

View File

@ -1,16 +1,17 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""usage: ./gen-emoji-table.py emoji-data.txt """usage: ./gen-emoji-table.py emoji-data.txt emoji-test.txt
Input file: Input file:
* https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt * https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
* https://www.unicode.org/Public/emoji/latest/emoji-test.txt
""" """
import sys import sys
from collections import OrderedDict from collections import OrderedDict
import packTab import packTab
if len (sys.argv) != 2: if len (sys.argv) != 3:
sys.exit (__doc__) sys.exit (__doc__)
f = open(sys.argv[1]) f = open(sys.argv[1])
@ -74,3 +75,37 @@ print ()
print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */") print ("#endif /* HB_UNICODE_EMOJI_TABLE_HH */")
print () print ()
print ("/* == End of generated table == */") print ("/* == End of generated table == */")
# Generate test file.
sequences = []
with open(sys.argv[2]) as f:
for line in f.readlines():
if "#" in line:
line = line[:line.index("#")]
if ";" in line:
line = line[:line.index(";")]
line = line.strip()
if not line:
continue
line = line.split(" ")
if len(line) == 1:
continue
sequences.append(line)
# Split into number of sequences per line, too small number slows the test, and
# too big overwhelms the test runner.
CHUNK = 50
with open("../test/shaping/data/in-house/tests/emoji-clusters.tests", "w") as f:
for i in range(0, len(sequences), CHUNK):
outputs = []
inputs = []
cluster = 0
for sequence in sequences[i:i + CHUNK]:
outputs.append("|".join(f"1={cluster}" for c in sequence))
inputs.append(",".join(sequence))
cluster += len(sequence)
f.write("../fonts/AdobeBlank2.ttf:--no-glyph-names --no-positions --font-funcs=ot")
f.write(":" + ",".join(inputs))
f.write(":[" + "|".join(outputs) + "]\n")

View File

@ -16,6 +16,7 @@ TESTS = \
tests/default-ignorables.tests \ tests/default-ignorables.tests \
tests/digits.tests \ tests/digits.tests \
tests/emoji.tests \ tests/emoji.tests \
tests/emoji-clusters.tests \
tests/fallback-positioning.tests \ tests/fallback-positioning.tests \
tests/hangul-jamo.tests \ tests/hangul-jamo.tests \
tests/hyphens.tests \ tests/hyphens.tests \

Binary file not shown.

View File

@ -16,6 +16,7 @@ in_house_tests = [
'default-ignorables.tests', 'default-ignorables.tests',
'digits.tests', 'digits.tests',
'emoji.tests', 'emoji.tests',
'emoji-clusters.tests',
'fallback-positioning.tests', 'fallback-positioning.tests',
'hangul-jamo.tests', 'hangul-jamo.tests',
'hyphens.tests', 'hyphens.tests',

File diff suppressed because one or more lines are too long