From d8df714251c61e13d7d12eb3f7393dd3a75a3719 Mon Sep 17 00:00:00 2001 From: David Corbett Date: Wed, 25 Oct 2017 16:06:01 -0400 Subject: [PATCH] Fix code point iteration in narrow Python --- test/shaping/hb_test_tools.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/test/shaping/hb_test_tools.py b/test/shaping/hb_test_tools.py index b9cb836a5..c9bb1dd33 100644 --- a/test/shaping/hb_test_tools.py +++ b/test/shaping/hb_test_tools.py @@ -7,6 +7,9 @@ from itertools import * diff_symbols = "-+=*&^%$#@!~/" diff_colors = ['red', 'green', 'blue'] +def codepoints(s): + return (ord (u) for u in s) + try: unichr = unichr @@ -43,6 +46,28 @@ try: except UnicodeDecodeError: raise ValueError('unichr() arg not in range(0x110000)') + def codepoints(s): + high_surrogate = None + for u in s: + cp = ord (u) + if 0xDC00 <= cp <= 0xDFFF: + if high_surrogate: + yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00) + high_surrogate = None + else: + yield 0xFFFC + else: + if high_surrogate: + yield 0xFFFC + high_surrogate = None + if 0xD800 <= cp <= 0xDBFF: + high_surrogate = cp + else: + yield cp + high_surrogate = None + if high_surrogate: + yield 0xFFFC + except NameError: unichr = chr @@ -456,7 +481,7 @@ class Unicode: @staticmethod def decode (s): - return u','.join ("U+%04X" % ord (u) for u in tounicode (s, 'utf-8')) + return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8'))) @staticmethod def parse (s):