[name] Flesh out UTF-X to UTF-X conversion routines
This commit is contained in:
parent
84811a06a2
commit
5531bd068e
|
@ -51,6 +51,51 @@ hb_ot_name_get_names (hb_face_t *face,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename in_utf_t, typename out_utf_t>
|
||||||
|
static inline unsigned int
|
||||||
|
hb_ot_name_convert_utf (const hb_bytes_t *bytes,
|
||||||
|
unsigned int *text_size /* IN/OUT */,
|
||||||
|
typename out_utf_t::codepoint_t *text /* OUT */)
|
||||||
|
{
|
||||||
|
unsigned int src_len = bytes->len / sizeof (typename in_utf_t::codepoint_t);
|
||||||
|
const typename in_utf_t::codepoint_t *src = (const typename in_utf_t::codepoint_t *) bytes->arrayZ;
|
||||||
|
const typename in_utf_t::codepoint_t *src_end = src + src_len;
|
||||||
|
|
||||||
|
typename out_utf_t::codepoint_t *dst = text;
|
||||||
|
|
||||||
|
hb_codepoint_t unicode;
|
||||||
|
const hb_codepoint_t replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
|
||||||
|
|
||||||
|
if (text_size && *text_size)
|
||||||
|
{
|
||||||
|
(*text_size)--; /* Same room for NUL-termination. */
|
||||||
|
const typename out_utf_t::codepoint_t *dst_end = text + *text_size;
|
||||||
|
|
||||||
|
while (src < src_end && dst < dst_end)
|
||||||
|
{
|
||||||
|
const typename in_utf_t::codepoint_t *src_next = in_utf_t::next (src, src_end, &unicode, replacement);
|
||||||
|
typename out_utf_t::codepoint_t *dst_next = out_utf_t::encode (dst, dst_end, unicode);
|
||||||
|
if (dst_next == dst)
|
||||||
|
break; /* Out-of-room. */
|
||||||
|
|
||||||
|
dst = dst_next;
|
||||||
|
src = src_next;
|
||||||
|
};
|
||||||
|
|
||||||
|
*text_size = dst - text;
|
||||||
|
*text = 0; /* NUL-terminate. */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Accumulate length of rest. */
|
||||||
|
unsigned int dst_len = dst - text;
|
||||||
|
while (src < src_end)
|
||||||
|
{
|
||||||
|
src = in_utf_t::next (src, src_end, &unicode, replacement);
|
||||||
|
dst_len += out_utf_t::encode_len (unicode);
|
||||||
|
};
|
||||||
|
return dst_len;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename utf_t>
|
template <typename utf_t>
|
||||||
static inline unsigned int
|
static inline unsigned int
|
||||||
hb_ot_name_get_utf (hb_face_t *face,
|
hb_ot_name_get_utf (hb_face_t *face,
|
||||||
|
@ -63,22 +108,27 @@ hb_ot_name_get_utf (hb_face_t *face,
|
||||||
unsigned int idx = 0; // XXX bsearch and find
|
unsigned int idx = 0; // XXX bsearch and find
|
||||||
hb_bytes_t bytes = name.table->get_name (idx);
|
hb_bytes_t bytes = name.table->get_name (idx);
|
||||||
|
|
||||||
unsigned int full_length = 0;
|
if (true /*UTF16-BE*/)
|
||||||
const typename utf_t::codepoint_t *src = (const typename utf_t::codepoint_t *) bytes.arrayZ;
|
return hb_ot_name_convert_utf<hb_utf16_be_t, utf_t> (&bytes, text_size, text);
|
||||||
unsigned int src_len = bytes.len / sizeof (typename utf_t::codepoint_t);
|
|
||||||
|
|
||||||
if (text_size && *text_size)
|
if (text_size)
|
||||||
{
|
{
|
||||||
*text_size--; /* Leave room for nul-termination. */
|
if (*text_size)
|
||||||
/* TODO Switch to walking string and validating. */
|
*text = 0;
|
||||||
memcpy (text,
|
*text_size = 0;
|
||||||
src,
|
|
||||||
MIN (*text_size, src_len) * sizeof (typename utf_t::codepoint_t));
|
|
||||||
}
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* Walk the rest, accumulate the full length. */
|
unsigned int
|
||||||
|
hb_ot_name_get_utf8 (hb_face_t *face,
|
||||||
return *text_size; //XXX
|
hb_name_id_t name_id,
|
||||||
|
hb_language_t language,
|
||||||
|
unsigned int *text_size /* IN/OUT */,
|
||||||
|
char *text /* OUT */)
|
||||||
|
{
|
||||||
|
return hb_ot_name_get_utf<hb_utf8_t> (face, name_id, language, text_size,
|
||||||
|
(hb_utf8_t::codepoint_t *) text);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int
|
unsigned int
|
||||||
|
@ -90,3 +140,13 @@ hb_ot_name_get_utf16 (hb_face_t *face,
|
||||||
{
|
{
|
||||||
return hb_ot_name_get_utf<hb_utf16_t> (face, name_id, language, text_size, text);
|
return hb_ot_name_get_utf<hb_utf16_t> (face, name_id, language, text_size, text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
hb_ot_name_get_utf32 (hb_face_t *face,
|
||||||
|
hb_name_id_t name_id,
|
||||||
|
hb_language_t language,
|
||||||
|
unsigned int *text_size /* IN/OUT */,
|
||||||
|
uint32_t *text /* OUT */)
|
||||||
|
{
|
||||||
|
return hb_ot_name_get_utf<hb_utf32_t> (face, name_id, language, text_size, text);
|
||||||
|
}
|
||||||
|
|
|
@ -49,14 +49,12 @@ typedef unsigned int hb_name_id_t;
|
||||||
#define HB_NAME_ID_INVALID 0xFFFF
|
#define HB_NAME_ID_INVALID 0xFFFF
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
unsigned int
|
||||||
HB_EXTERN unsigned int
|
hb_ot_name_get_utf8 (hb_face_t *face,
|
||||||
Xhb_ot_name_get_utf8 (hb_face_t *face,
|
|
||||||
hb_name_id_t name_id,
|
hb_name_id_t name_id,
|
||||||
hb_language_t language,
|
hb_language_t language,
|
||||||
unsigned int *text_size /* IN/OUT */,
|
unsigned int *text_size /* IN/OUT */,
|
||||||
char *text /* OUT */);
|
char *text /* OUT */);
|
||||||
#endif
|
|
||||||
|
|
||||||
HB_EXTERN unsigned int
|
HB_EXTERN unsigned int
|
||||||
hb_ot_name_get_utf16 (hb_face_t *face,
|
hb_ot_name_get_utf16 (hb_face_t *face,
|
||||||
|
@ -65,14 +63,12 @@ hb_ot_name_get_utf16 (hb_face_t *face,
|
||||||
unsigned int *text_size /* IN/OUT */,
|
unsigned int *text_size /* IN/OUT */,
|
||||||
uint16_t *text /* OUT */);
|
uint16_t *text /* OUT */);
|
||||||
|
|
||||||
#if 0
|
|
||||||
HB_EXTERN unsigned int
|
HB_EXTERN unsigned int
|
||||||
Xhb_ot_name_get_utf32 (hb_face_t *face,
|
hb_ot_name_get_utf32 (hb_face_t *face,
|
||||||
hb_name_id_t name_id,
|
hb_name_id_t name_id,
|
||||||
hb_language_t language,
|
hb_language_t language,
|
||||||
unsigned int *text_size /* IN/OUT */,
|
unsigned int *text_size /* IN/OUT */,
|
||||||
uint32_t *text /* OUT */);
|
uint32_t *text /* OUT */);
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct hb_ot_name_entry_t
|
typedef struct hb_ot_name_entry_t
|
||||||
|
|
107
src/hb-utf.hh
107
src/hb-utf.hh
|
@ -127,6 +127,55 @@ struct hb_utf8_t
|
||||||
{
|
{
|
||||||
return ::strlen ((const char *) text);
|
return ::strlen ((const char *) text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned int
|
||||||
|
encode_len (hb_codepoint_t unicode)
|
||||||
|
{
|
||||||
|
if (unicode < 0x0080u) return 1;
|
||||||
|
if (unicode < 0x0800u) return 2;
|
||||||
|
if (unicode < 0x10000u) return 3;
|
||||||
|
if (unicode < 0x110000u) return 4;
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline codepoint_t *
|
||||||
|
encode (codepoint_t *text,
|
||||||
|
const codepoint_t *end,
|
||||||
|
hb_codepoint_t unicode)
|
||||||
|
{
|
||||||
|
if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
|
||||||
|
unicode = 0xFFFDu;
|
||||||
|
if (unicode < 0x0080u)
|
||||||
|
*text++ = unicode;
|
||||||
|
else if (unicode < 0x0800u)
|
||||||
|
{
|
||||||
|
if (end - text >= 2)
|
||||||
|
{
|
||||||
|
*text++ = 0xC0u + (0x1Fu & (unicode >> 6));
|
||||||
|
*text++ = 0x80u + (0x3Fu & (unicode ));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (unicode < 0x10000u)
|
||||||
|
{
|
||||||
|
if (end - text >= 3)
|
||||||
|
{
|
||||||
|
*text++ = 0xE0u + (0x0Fu & (unicode >> 12));
|
||||||
|
*text++ = 0x80u + (0x3Fu & (unicode >> 6));
|
||||||
|
*text++ = 0x80u + (0x3Fu & (unicode ));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (end - text >= 4)
|
||||||
|
{
|
||||||
|
*text++ = 0xF0u + (0x07u & (unicode >> 18));
|
||||||
|
*text++ = 0x80u + (0x3Fu & (unicode >> 12));
|
||||||
|
*text++ = 0x80u + (0x3Fu & (unicode >> 6));
|
||||||
|
*text++ = 0x80u + (0x3Fu & (unicode ));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -208,6 +257,30 @@ struct hb_utf16_xe_t
|
||||||
while (*text++) l++;
|
while (*text++) l++;
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned int
|
||||||
|
encode_len (hb_codepoint_t unicode)
|
||||||
|
{
|
||||||
|
return unicode < 0x10000 ? 1 : 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline codepoint_t *
|
||||||
|
encode (codepoint_t *text,
|
||||||
|
const codepoint_t *end,
|
||||||
|
hb_codepoint_t unicode)
|
||||||
|
{
|
||||||
|
if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
|
||||||
|
unicode = 0xFFFDu;
|
||||||
|
if (unicode < 0x10000u)
|
||||||
|
*text++ = unicode;
|
||||||
|
else if (end - text >= 2)
|
||||||
|
{
|
||||||
|
unicode -= 0x10000u;
|
||||||
|
*text++ = 0xD800u + (unicode >> 10);
|
||||||
|
*text++ = 0xDC00u + (unicode & 0x03FFu);
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef hb_utf16_xe_t<uint16_t> hb_utf16_t;
|
typedef hb_utf16_xe_t<uint16_t> hb_utf16_t;
|
||||||
|
@ -251,6 +324,23 @@ struct hb_utf32_xe_t
|
||||||
while (*text++) l++;
|
while (*text++) l++;
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned int
|
||||||
|
encode_len (hb_codepoint_t unicode HB_UNUSED)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline codepoint_t *
|
||||||
|
encode (codepoint_t *text,
|
||||||
|
const codepoint_t *end HB_UNUSED,
|
||||||
|
hb_codepoint_t unicode)
|
||||||
|
{
|
||||||
|
if (validate && unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
|
||||||
|
unicode = 0xFFFDu;
|
||||||
|
*text++ = unicode;
|
||||||
|
return text;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef hb_utf32_xe_t<uint32_t> hb_utf32_t;
|
typedef hb_utf32_xe_t<uint32_t> hb_utf32_t;
|
||||||
|
@ -289,6 +379,23 @@ struct hb_latin1_t
|
||||||
while (*text++) l++;
|
while (*text++) l++;
|
||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned int
|
||||||
|
encode_len (hb_codepoint_t unicode HB_UNUSED)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline codepoint_t *
|
||||||
|
encode (codepoint_t *text,
|
||||||
|
const codepoint_t *end HB_UNUSED,
|
||||||
|
hb_codepoint_t unicode)
|
||||||
|
{
|
||||||
|
if (unlikely (unicode >= 0x0100u))
|
||||||
|
unicode = '?';
|
||||||
|
*text++ = unicode;
|
||||||
|
return text;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* HB_UTF_HH */
|
#endif /* HB_UTF_HH */
|
||||||
|
|
Loading…
Reference in New Issue