[name] Flesh out UTF-X to UTF-X conversion routines
This commit is contained in:
parent
84811a06a2
commit
5531bd068e
|
@ -51,6 +51,51 @@ hb_ot_name_get_names (hb_face_t *face,
|
|||
}
|
||||
|
||||
|
||||
template <typename in_utf_t, typename out_utf_t>
|
||||
static inline unsigned int
|
||||
hb_ot_name_convert_utf (const hb_bytes_t *bytes,
|
||||
unsigned int *text_size /* IN/OUT */,
|
||||
typename out_utf_t::codepoint_t *text /* OUT */)
|
||||
{
|
||||
unsigned int src_len = bytes->len / sizeof (typename in_utf_t::codepoint_t);
|
||||
const typename in_utf_t::codepoint_t *src = (const typename in_utf_t::codepoint_t *) bytes->arrayZ;
|
||||
const typename in_utf_t::codepoint_t *src_end = src + src_len;
|
||||
|
||||
typename out_utf_t::codepoint_t *dst = text;
|
||||
|
||||
hb_codepoint_t unicode;
|
||||
const hb_codepoint_t replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
|
||||
|
||||
if (text_size && *text_size)
|
||||
{
|
||||
(*text_size)--; /* Same room for NUL-termination. */
|
||||
const typename out_utf_t::codepoint_t *dst_end = text + *text_size;
|
||||
|
||||
while (src < src_end && dst < dst_end)
|
||||
{
|
||||
const typename in_utf_t::codepoint_t *src_next = in_utf_t::next (src, src_end, &unicode, replacement);
|
||||
typename out_utf_t::codepoint_t *dst_next = out_utf_t::encode (dst, dst_end, unicode);
|
||||
if (dst_next == dst)
|
||||
break; /* Out-of-room. */
|
||||
|
||||
dst = dst_next;
|
||||
src = src_next;
|
||||
};
|
||||
|
||||
*text_size = dst - text;
|
||||
*text = 0; /* NUL-terminate. */
|
||||
}
|
||||
|
||||
/* Accumulate length of rest. */
|
||||
unsigned int dst_len = dst - text;
|
||||
while (src < src_end)
|
||||
{
|
||||
src = in_utf_t::next (src, src_end, &unicode, replacement);
|
||||
dst_len += out_utf_t::encode_len (unicode);
|
||||
};
|
||||
return dst_len;
|
||||
}
|
||||
|
||||
template <typename utf_t>
|
||||
static inline unsigned int
|
||||
hb_ot_name_get_utf (hb_face_t *face,
|
||||
|
@ -63,22 +108,27 @@ hb_ot_name_get_utf (hb_face_t *face,
|
|||
unsigned int idx = 0; // XXX bsearch and find
|
||||
hb_bytes_t bytes = name.table->get_name (idx);
|
||||
|
||||
unsigned int full_length = 0;
|
||||
const typename utf_t::codepoint_t *src = (const typename utf_t::codepoint_t *) bytes.arrayZ;
|
||||
unsigned int src_len = bytes.len / sizeof (typename utf_t::codepoint_t);
|
||||
if (true /*UTF16-BE*/)
|
||||
return hb_ot_name_convert_utf<hb_utf16_be_t, utf_t> (&bytes, text_size, text);
|
||||
|
||||
if (text_size && *text_size)
|
||||
if (text_size)
|
||||
{
|
||||
*text_size--; /* Leave room for nul-termination. */
|
||||
/* TODO Switch to walking string and validating. */
|
||||
memcpy (text,
|
||||
src,
|
||||
MIN (*text_size, src_len) * sizeof (typename utf_t::codepoint_t));
|
||||
if (*text_size)
|
||||
*text = 0;
|
||||
*text_size = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Walk the rest, accumulate the full length. */
|
||||
|
||||
return *text_size; //XXX
|
||||
unsigned int
|
||||
hb_ot_name_get_utf8 (hb_face_t *face,
|
||||
hb_name_id_t name_id,
|
||||
hb_language_t language,
|
||||
unsigned int *text_size /* IN/OUT */,
|
||||
char *text /* OUT */)
|
||||
{
|
||||
return hb_ot_name_get_utf<hb_utf8_t> (face, name_id, language, text_size,
|
||||
(hb_utf8_t::codepoint_t *) text);
|
||||
}
|
||||
|
||||
unsigned int
|
||||
|
@ -90,3 +140,13 @@ hb_ot_name_get_utf16 (hb_face_t *face,
|
|||
{
|
||||
return hb_ot_name_get_utf<hb_utf16_t> (face, name_id, language, text_size, text);
|
||||
}
|
||||
|
||||
unsigned int
|
||||
hb_ot_name_get_utf32 (hb_face_t *face,
|
||||
hb_name_id_t name_id,
|
||||
hb_language_t language,
|
||||
unsigned int *text_size /* IN/OUT */,
|
||||
uint32_t *text /* OUT */)
|
||||
{
|
||||
return hb_ot_name_get_utf<hb_utf32_t> (face, name_id, language, text_size, text);
|
||||
}
|
||||
|
|
|
@ -49,14 +49,12 @@ typedef unsigned int hb_name_id_t;
|
|||
#define HB_NAME_ID_INVALID 0xFFFF
|
||||
|
||||
|
||||
#if 0
|
||||
HB_EXTERN unsigned int
|
||||
Xhb_ot_name_get_utf8 (hb_face_t *face,
|
||||
unsigned int
|
||||
hb_ot_name_get_utf8 (hb_face_t *face,
|
||||
hb_name_id_t name_id,
|
||||
hb_language_t language,
|
||||
unsigned int *text_size /* IN/OUT */,
|
||||
char *text /* OUT */);
|
||||
#endif
|
||||
|
||||
HB_EXTERN unsigned int
|
||||
hb_ot_name_get_utf16 (hb_face_t *face,
|
||||
|
@ -65,14 +63,12 @@ hb_ot_name_get_utf16 (hb_face_t *face,
|
|||
unsigned int *text_size /* IN/OUT */,
|
||||
uint16_t *text /* OUT */);
|
||||
|
||||
#if 0
|
||||
HB_EXTERN unsigned int
|
||||
Xhb_ot_name_get_utf32 (hb_face_t *face,
|
||||
hb_ot_name_get_utf32 (hb_face_t *face,
|
||||
hb_name_id_t name_id,
|
||||
hb_language_t language,
|
||||
unsigned int *text_size /* IN/OUT */,
|
||||
uint32_t *text /* OUT */);
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct hb_ot_name_entry_t
|
||||
|
|
107
src/hb-utf.hh
107
src/hb-utf.hh
|
@ -127,6 +127,55 @@ struct hb_utf8_t
|
|||
{
|
||||
return ::strlen ((const char *) text);
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
encode_len (hb_codepoint_t unicode)
|
||||
{
|
||||
if (unicode < 0x0080u) return 1;
|
||||
if (unicode < 0x0800u) return 2;
|
||||
if (unicode < 0x10000u) return 3;
|
||||
if (unicode < 0x110000u) return 4;
|
||||
return 3;
|
||||
}
|
||||
|
||||
static inline codepoint_t *
|
||||
encode (codepoint_t *text,
|
||||
const codepoint_t *end,
|
||||
hb_codepoint_t unicode)
|
||||
{
|
||||
if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
|
||||
unicode = 0xFFFDu;
|
||||
if (unicode < 0x0080u)
|
||||
*text++ = unicode;
|
||||
else if (unicode < 0x0800u)
|
||||
{
|
||||
if (end - text >= 2)
|
||||
{
|
||||
*text++ = 0xC0u + (0x1Fu & (unicode >> 6));
|
||||
*text++ = 0x80u + (0x3Fu & (unicode ));
|
||||
}
|
||||
}
|
||||
else if (unicode < 0x10000u)
|
||||
{
|
||||
if (end - text >= 3)
|
||||
{
|
||||
*text++ = 0xE0u + (0x0Fu & (unicode >> 12));
|
||||
*text++ = 0x80u + (0x3Fu & (unicode >> 6));
|
||||
*text++ = 0x80u + (0x3Fu & (unicode ));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (end - text >= 4)
|
||||
{
|
||||
*text++ = 0xF0u + (0x07u & (unicode >> 18));
|
||||
*text++ = 0x80u + (0x3Fu & (unicode >> 12));
|
||||
*text++ = 0x80u + (0x3Fu & (unicode >> 6));
|
||||
*text++ = 0x80u + (0x3Fu & (unicode ));
|
||||
}
|
||||
}
|
||||
return text;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -208,6 +257,30 @@ struct hb_utf16_xe_t
|
|||
while (*text++) l++;
|
||||
return l;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
encode_len (hb_codepoint_t unicode)
|
||||
{
|
||||
return unicode < 0x10000 ? 1 : 2;
|
||||
}
|
||||
|
||||
static inline codepoint_t *
|
||||
encode (codepoint_t *text,
|
||||
const codepoint_t *end,
|
||||
hb_codepoint_t unicode)
|
||||
{
|
||||
if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
|
||||
unicode = 0xFFFDu;
|
||||
if (unicode < 0x10000u)
|
||||
*text++ = unicode;
|
||||
else if (end - text >= 2)
|
||||
{
|
||||
unicode -= 0x10000u;
|
||||
*text++ = 0xD800u + (unicode >> 10);
|
||||
*text++ = 0xDC00u + (unicode & 0x03FFu);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
};
|
||||
|
||||
typedef hb_utf16_xe_t<uint16_t> hb_utf16_t;
|
||||
|
@ -251,6 +324,23 @@ struct hb_utf32_xe_t
|
|||
while (*text++) l++;
|
||||
return l;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
encode_len (hb_codepoint_t unicode HB_UNUSED)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline codepoint_t *
|
||||
encode (codepoint_t *text,
|
||||
const codepoint_t *end HB_UNUSED,
|
||||
hb_codepoint_t unicode)
|
||||
{
|
||||
if (validate && unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
|
||||
unicode = 0xFFFDu;
|
||||
*text++ = unicode;
|
||||
return text;
|
||||
}
|
||||
};
|
||||
|
||||
typedef hb_utf32_xe_t<uint32_t> hb_utf32_t;
|
||||
|
@ -289,6 +379,23 @@ struct hb_latin1_t
|
|||
while (*text++) l++;
|
||||
return l;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
encode_len (hb_codepoint_t unicode HB_UNUSED)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline codepoint_t *
|
||||
encode (codepoint_t *text,
|
||||
const codepoint_t *end HB_UNUSED,
|
||||
hb_codepoint_t unicode)
|
||||
{
|
||||
if (unlikely (unicode >= 0x0100u))
|
||||
unicode = '?';
|
||||
*text++ = unicode;
|
||||
return text;
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* HB_UTF_HH */
|
||||
|
|
Loading…
Reference in New Issue