Fix code point iteration in narrow Python

This commit is contained in:
David Corbett 2017-10-25 16:06:01 -04:00 committed by Behdad Esfahbod
parent 33ca3b67bf
commit d8df714251
1 changed files with 26 additions and 1 deletions

View File

@ -7,6 +7,9 @@ from itertools import *
diff_symbols = "-+=*&^%$#@!~/"
diff_colors = ['red', 'green', 'blue']
def codepoints(s):
return (ord (u) for u in s)
try:
unichr = unichr
@ -43,6 +46,28 @@ try:
except UnicodeDecodeError:
raise ValueError('unichr() arg not in range(0x110000)')
def codepoints(s):
high_surrogate = None
for u in s:
cp = ord (u)
if 0xDC00 <= cp <= 0xDFFF:
if high_surrogate:
yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
high_surrogate = None
else:
yield 0xFFFC
else:
if high_surrogate:
yield 0xFFFC
high_surrogate = None
if 0xD800 <= cp <= 0xDBFF:
high_surrogate = cp
else:
yield cp
high_surrogate = None
if high_surrogate:
yield 0xFFFC
except NameError:
unichr = chr
@ -456,7 +481,7 @@ class Unicode:
@staticmethod
def decode (s):
return u','.join ("U+%04X" % ord (u) for u in tounicode (s, 'utf-8'))
return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
@staticmethod
def parse (s):