[name] Flesh out UTF-X to UTF-X conversion routines

2018-10-23 20:04:05 -07:00 · 2018-10-23 20:04:05 -07:00 · 5531bd068e
parent 84811a06a2
commit 5531bd068e
3 changed files with 182 additions and 19 deletions
--- a/src/hb-ot-name.cc
+++ b/src/hb-ot-name.cc
@ -51,6 +51,51 @@ hb_ot_name_get_names (hb_face_t                 *face,
 }


+template <typename in_utf_t, typename out_utf_t>
+static inline unsigned int
+hb_ot_name_convert_utf (const hb_bytes_t                *bytes,
+			unsigned int                    *text_size /* IN/OUT */,
+			typename out_utf_t::codepoint_t *text /* OUT */)
+{
+  unsigned int src_len = bytes->len / sizeof (typename in_utf_t::codepoint_t);
+  const typename in_utf_t::codepoint_t *src = (const typename in_utf_t::codepoint_t *) bytes->arrayZ;
+  const typename in_utf_t::codepoint_t *src_end = src + src_len;
+
+  typename out_utf_t::codepoint_t *dst = text;
+
+  hb_codepoint_t unicode;
+  const hb_codepoint_t replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT;
+
+  if (text_size && *text_size)
+  {
+    (*text_size)--; /* Same room for NUL-termination. */
+    const typename out_utf_t::codepoint_t *dst_end = text + *text_size;
+
+    while (src < src_end && dst < dst_end)
+    {
+      const typename in_utf_t::codepoint_t *src_next = in_utf_t::next (src, src_end, &unicode, replacement);
+      typename out_utf_t::codepoint_t *dst_next = out_utf_t::encode (dst, dst_end, unicode);
+      if (dst_next == dst)
+        break; /* Out-of-room. */
+
+      dst = dst_next;
+      src = src_next;
+    };
+
+    *text_size = dst - text;
+    *text = 0; /* NUL-terminate. */
+  }
+
+  /* Accumulate length of rest. */
+  unsigned int dst_len = dst - text;
+  while (src < src_end)
+  {
+    src = in_utf_t::next (src, src_end, &unicode, replacement);
+    dst_len += out_utf_t::encode_len (unicode);
+  };
+  return dst_len;
+}
+
 template <typename utf_t>
 static inline unsigned int
 hb_ot_name_get_utf (hb_face_t     *face,
@ -63,22 +108,27 @@ hb_ot_name_get_utf (hb_face_t     *face,
  unsigned int idx = 0; // XXX bsearch and find
  hb_bytes_t bytes = name.table->get_name (idx);

-  unsigned int full_length = 0;
-  const typename utf_t::codepoint_t *src = (const typename utf_t::codepoint_t *) bytes.arrayZ;
-  unsigned int src_len = bytes.len / sizeof (typename utf_t::codepoint_t);
+  if (true /*UTF16-BE*/)
+    return hb_ot_name_convert_utf<hb_utf16_be_t, utf_t> (&bytes, text_size, text);

-  if (text_size && *text_size)
+  if (text_size)
  {
-    *text_size--; /* Leave room for nul-termination. */
-    /* TODO Switch to walking string and validating. */
-    memcpy (text,
-	    src,
-	    MIN (*text_size, src_len) * sizeof (typename utf_t::codepoint_t));
+    if (*text_size)
+      *text = 0;
+    *text_size = 0;
  }
+  return 0;
+}

-  /* Walk the rest, accumulate the full length. */
-
-  return *text_size; //XXX
+unsigned int
+hb_ot_name_get_utf8 (hb_face_t     *face,
+		     hb_name_id_t   name_id,
+		     hb_language_t  language,
+		     unsigned int  *text_size /* IN/OUT */,
+		     char          *text      /* OUT */)
+{
+  return hb_ot_name_get_utf<hb_utf8_t> (face, name_id, language, text_size,
+					(hb_utf8_t::codepoint_t *) text);
 }

 unsigned int
@ -90,3 +140,13 @@ hb_ot_name_get_utf16 (hb_face_t     *face,
 {
  return hb_ot_name_get_utf<hb_utf16_t> (face, name_id, language, text_size, text);
 }
+
+unsigned int
+hb_ot_name_get_utf32 (hb_face_t     *face,
+		      hb_name_id_t   name_id,
+		      hb_language_t  language,
+		      unsigned int  *text_size /* IN/OUT */,
+		      uint32_t      *text      /* OUT */)
+{
+  return hb_ot_name_get_utf<hb_utf32_t> (face, name_id, language, text_size, text);
+}
--- a/src/hb-ot-name.h
+++ b/src/hb-ot-name.h
@ -49,14 +49,12 @@ typedef unsigned int hb_name_id_t;
 #define HB_NAME_ID_INVALID 0xFFFF


-#if 0
-HB_EXTERN unsigned int
-Xhb_ot_name_get_utf8 (hb_face_t     *face,
+unsigned int
+hb_ot_name_get_utf8 (hb_face_t     *face,
 		     hb_name_id_t   name_id,
 		     hb_language_t  language,
 		     unsigned int  *text_size /* IN/OUT */,
 		     char          *text      /* OUT */);
-#endif

 HB_EXTERN unsigned int
 hb_ot_name_get_utf16 (hb_face_t     *face,
@ -65,14 +63,12 @@ hb_ot_name_get_utf16 (hb_face_t     *face,
 		      unsigned int  *text_size /* IN/OUT */,
 		      uint16_t      *text      /* OUT */);

-#if 0
 HB_EXTERN unsigned int
-Xhb_ot_name_get_utf32 (hb_face_t     *face,
+hb_ot_name_get_utf32 (hb_face_t     *face,
 		      hb_name_id_t   name_id,
 		      hb_language_t  language,
 		      unsigned int  *text_size /* IN/OUT */,
 		      uint32_t      *text      /* OUT */);
-#endif


 typedef struct hb_ot_name_entry_t
--- a/src/hb-utf.hh
+++ b/src/hb-utf.hh
@ -127,6 +127,55 @@ struct hb_utf8_t
  {
    return ::strlen ((const char *) text);
  }
+
+  static inline unsigned int
+  encode_len (hb_codepoint_t unicode)
+  {
+    if (unicode <   0x0080u) return 1;
+    if (unicode <   0x0800u) return 2;
+    if (unicode <  0x10000u) return 3;
+    if (unicode < 0x110000u) return 4;
+    return 3;
+  }
+
+  static inline codepoint_t *
+  encode (codepoint_t *text,
+	  const codepoint_t *end,
+	  hb_codepoint_t unicode)
+  {
+    if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
+      unicode = 0xFFFDu;
+    if (unicode < 0x0080u)
+     *text++ = unicode;
+    else if (unicode < 0x0800u)
+    {
+      if (end - text >= 2)
+      {
+	*text++ =  0xC0u + (0x1Fu & (unicode >>  6));
+	*text++ =  0x80u + (0x3Fu & (unicode      ));
+      }
+    }
+    else if (unicode < 0x10000u)
+    {
+      if (end - text >= 3)
+      {
+	*text++ =  0xE0u + (0x0Fu & (unicode >> 12));
+	*text++ =  0x80u + (0x3Fu & (unicode >>  6));
+	*text++ =  0x80u + (0x3Fu & (unicode      ));
+      }
+    }
+    else
+    {
+      if (end - text >= 4)
+      {
+	*text++ =  0xF0u + (0x07u & (unicode >> 18));
+	*text++ =  0x80u + (0x3Fu & (unicode >> 12));
+	*text++ =  0x80u + (0x3Fu & (unicode >>  6));
+	*text++ =  0x80u + (0x3Fu & (unicode      ));
+      }
+    }
+    return text;
+  }
 };


@ -208,6 +257,30 @@ struct hb_utf16_xe_t
    while (*text++) l++;
    return l;
  }
+
+  static inline unsigned int
+  encode_len (hb_codepoint_t unicode)
+  {
+    return unicode < 0x10000 ? 1 : 2;
+  }
+
+  static inline codepoint_t *
+  encode (codepoint_t *text,
+	  const codepoint_t *end,
+	  hb_codepoint_t unicode)
+  {
+    if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
+      unicode = 0xFFFDu;
+    if (unicode < 0x10000u)
+     *text++ = unicode;
+    else if (end - text >= 2)
+    {
+      unicode -= 0x10000u;
+      *text++ =  0xD800u + (unicode >> 10);
+      *text++ =  0xDC00u + (unicode & 0x03FFu);
+    }
+    return text;
+  }
 };

 typedef hb_utf16_xe_t<uint16_t> hb_utf16_t;
@ -251,6 +324,23 @@ struct hb_utf32_xe_t
    while (*text++) l++;
    return l;
  }
+
+  static inline unsigned int
+  encode_len (hb_codepoint_t unicode HB_UNUSED)
+  {
+    return 1;
+  }
+
+  static inline codepoint_t *
+  encode (codepoint_t *text,
+	  const codepoint_t *end HB_UNUSED,
+	  hb_codepoint_t unicode)
+  {
+    if (validate && unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu)))
+      unicode = 0xFFFDu;
+    *text++ = unicode;
+    return text;
+  }
 };

 typedef hb_utf32_xe_t<uint32_t> hb_utf32_t;
@ -289,6 +379,23 @@ struct hb_latin1_t
    while (*text++) l++;
    return l;
  }
+
+  static inline unsigned int
+  encode_len (hb_codepoint_t unicode HB_UNUSED)
+  {
+    return 1;
+  }
+
+  static inline codepoint_t *
+  encode (codepoint_t *text,
+	  const codepoint_t *end HB_UNUSED,
+	  hb_codepoint_t unicode)
+  {
+    if (unlikely (unicode >= 0x0100u))
+      unicode = '?';
+    *text++ = unicode;
+    return text;
+  }
 };

 #endif /* HB_UTF_HH */