diff --git a/src/hb-utf-private.hh b/src/hb-utf-private.hh index a77139f8e..8cde82747 100644 --- a/src/hb-utf-private.hh +++ b/src/hb-utf-private.hh @@ -72,6 +72,39 @@ hb_utf_next (const uint8_t *text, } } +static inline const uint8_t * +hb_utf_prev (const uint8_t *text, + const uint8_t *start, + hb_codepoint_t *unicode) +{ + const uint8_t *end = text; + while (start < text && (*--text & 0xc0) == 0x80 && end - text < 4) + text--; + + hb_codepoint_t c = *text, mask; + unsigned int len; + + /* TODO check for overlong sequences? */ + + HB_UTF8_COMPUTE (c, mask, len); + if (unlikely (!len || (unsigned int) (end - text) != len)) { + *unicode = -1; + return end - 1; + } else { + hb_codepoint_t result; + unsigned int i; + result = c & mask; + for (i = 1; i < len; i++) + { + result <<= 6; + result |= (text[i] & 0x3f); + } + *unicode = result; + return text; + } +} + + static inline unsigned int hb_utf_strlen (const uint8_t *text) { @@ -105,6 +138,31 @@ hb_utf_next (const uint16_t *text, return text; } +static inline const uint16_t * +hb_utf_prev (const uint16_t *text, + const uint16_t *start, + hb_codepoint_t *unicode) +{ + hb_codepoint_t c = *--text; + + if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff))) + { + /* low surrogate */ + hb_codepoint_t h; + if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff)))) + { + /* high surrogate */ + *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00); + text--; + } else + *unicode = -1; + } else + *unicode = c; + + return text; +} + + static inline unsigned int hb_utf_strlen (const uint16_t *text) { @@ -121,8 +179,17 @@ hb_utf_next (const uint32_t *text, const uint32_t *end, hb_codepoint_t *unicode) { - *unicode = *text; - return text + 1; + *unicode = *text++; + return text; +} + +static inline const uint32_t * +hb_utf_prev (const uint32_t *text, + const uint32_t *start, + hb_codepoint_t *unicode) +{ + *unicode = *--text; + return text; } static inline unsigned int