From 6f13b6d62daae4989e3cc2fe4b168e5c59650964 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Thu, 10 Jul 2014 19:31:40 -0400 Subject: [PATCH] When parsing UTF-16, generate invalid codepoint for lonely low surrogate Test passes now. --- src/hb-utf-private.hh | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/hb-utf-private.hh b/src/hb-utf-private.hh index b9a6519d2..11f23cc93 100644 --- a/src/hb-utf-private.hh +++ b/src/hb-utf-private.hh @@ -121,20 +121,27 @@ hb_utf_next (const uint16_t *text, { hb_codepoint_t c = *text++; - if (unlikely (hb_in_range (c, 0xd800, 0xdbff))) + if (likely (!hb_in_range (c, 0xd800, 0xdfff))) { - /* high surrogate */ + *unicode = c; + return text; + } + + if (likely (hb_in_range (c, 0xd800, 0xdbff))) + { + /* High-surrogate in c */ hb_codepoint_t l; if (text < end && ((l = *text), likely (hb_in_range (l, 0xdc00, 0xdfff)))) { - /* low surrogate */ + /* Low-surrogate in l */ *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00); text++; - } else - *unicode = -1; - } else - *unicode = c; + return text; + } + } + /* Lonely / out-of-order surrogate. */ + *unicode = -1; return text; } @@ -145,20 +152,27 @@ hb_utf_prev (const uint16_t *text, { hb_codepoint_t c = *--text; - if (unlikely (hb_in_range (c, 0xdc00, 0xdfff))) + if (likely (!hb_in_range (c, 0xd800, 0xdfff))) { - /* low surrogate */ + *unicode = c; + return text; + } + + if (likely (hb_in_range (c, 0xdc00, 0xdfff))) + { + /* Low-surrogate in c */ hb_codepoint_t h; if (start < text && ((h = *(text - 1)), likely (hb_in_range (h, 0xd800, 0xdbff)))) { - /* high surrogate */ + /* High-surrogate in h */ *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00); text--; - } else - *unicode = -1; - } else - *unicode = c; + return text; + } + } + /* Lonely / out-of-order surrogate. */ + *unicode = -1; return text; }