[API] Add hb_buffer_add_latin1()

This is by no ways to promote non-Unicode encodings. This is an entry point that takes Unicode codepoints that happen to all be the first 256 characters and hence fit in 8bit strings. This is useful eg in Chrome where strings that can fit in 8bit are implemented that way, and this avoids copying into UTF-8 or UTF-16. Perhaps we should rename this to hb_buffer_add_codepoints8(). I'm also curious if anyone would be really interested in hb_buffer_add_codepoints16(). Please discuss!
2015-01-26 14:25:52 -08:00 · 2015-01-26 14:25:52 -08:00 · 61820bc4ca
parent 78c6e86c04
commit 61820bc4ca
3 changed files with 80 additions and 21 deletions
--- a/src/hb-buffer.cc
+++ b/src/hb-buffer.cc
@ -1328,15 +1328,15 @@ hb_buffer_guess_segment_properties (hb_buffer_t *buffer)
  buffer->guess_segment_properties ();
 }

-template <bool validate, typename T>
+template <typename utf_t>
 static inline void
 hb_buffer_add_utf (hb_buffer_t  *buffer,
-		   const T      *text,
+		   const typename utf_t::codepoint_t *text,
 		   int           text_length,
 		   unsigned int  item_offset,
 		   int           item_length)
 {
-  typedef hb_utf_t<T, validate> utf_t;
+  typedef typename utf_t::codepoint_t T;
  const hb_codepoint_t replacement = buffer->replacement;

  assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE ||
@ -1416,7 +1416,7 @@ hb_buffer_add_utf8 (hb_buffer_t  *buffer,
 		    unsigned int  item_offset,
 		    int           item_length)
 {
-  hb_buffer_add_utf<true> (buffer, (const uint8_t *) text, text_length, item_offset, item_length);
+  hb_buffer_add_utf<hb_utf8_t> (buffer, (const uint8_t *) text, text_length, item_offset, item_length);
 }

 /**
@ -1438,7 +1438,7 @@ hb_buffer_add_utf16 (hb_buffer_t    *buffer,
 		     unsigned int    item_offset,
 		     int             item_length)
 {
-  hb_buffer_add_utf<true> (buffer, text, text_length, item_offset, item_length);
+  hb_buffer_add_utf<hb_utf16_t> (buffer, text, text_length, item_offset, item_length);
 }

 /**
@ -1460,7 +1460,29 @@ hb_buffer_add_utf32 (hb_buffer_t    *buffer,
 		     unsigned int    item_offset,
 		     int             item_length)
 {
-  hb_buffer_add_utf<true> (buffer, text, text_length, item_offset, item_length);
+  hb_buffer_add_utf<hb_utf32_t<> > (buffer, text, text_length, item_offset, item_length);
+}
+
+/**
+ * hb_buffer_add_latin1:
+ * @buffer: a buffer.
+ * @text: (array length=text_length) (element-type uint8_t):
+ * @text_length: 
+ * @item_offset: 
+ * @item_length: 
+ *
+ * 
+ *
+ * Since: 1.0
+ **/
+void
+hb_buffer_add_latin1 (hb_buffer_t   *buffer,
+		      const uint8_t *text,
+		      int            text_length,
+		      unsigned int   item_offset,
+		      int            item_length)
+{
+  hb_buffer_add_utf<hb_latin1_t> (buffer, text, text_length, item_offset, item_length);
 }

 /**
@ -1482,7 +1504,7 @@ hb_buffer_add_codepoints (hb_buffer_t          *buffer,
 			  unsigned int          item_offset,
 			  int                   item_length)
 {
-  hb_buffer_add_utf<false> (buffer, text, text_length, item_offset, item_length);
+  hb_buffer_add_utf<hb_utf32_t<false> > (buffer, text, text_length, item_offset, item_length);
 }


--- a/src/hb-buffer.h
+++ b/src/hb-buffer.h
@ -253,6 +253,14 @@ hb_buffer_add_utf32 (hb_buffer_t    *buffer,
 		     unsigned int    item_offset,
 		     int             item_length);

+/* Allows only access to first 256 Unicode codepoints. */
+void
+hb_buffer_add_latin1 (hb_buffer_t   *buffer,
+		      const uint8_t *text,
+		      int            text_length,
+		      unsigned int   item_offset,
+		      int            item_length);
+
 /* Like add_utf32 but does NOT check for invalid Unicode codepoints. */
 void
 hb_buffer_add_codepoints (hb_buffer_t          *buffer,
--- a/src/hb-utf-private.hh
+++ b/src/hb-utf-private.hh
@ -29,14 +29,11 @@

 #include "hb-private.hh"

-template <typename T, bool validate=true> struct hb_utf_t;

-
-/* UTF-8 */
-
-template <>
-struct hb_utf_t<uint8_t, true>
+struct hb_utf8_t
 {
+  typedef uint8_t codepoint_t;
+
  static inline const uint8_t *
  next (const uint8_t *text,
 	const uint8_t *end,
@ -131,11 +128,10 @@ struct hb_utf_t<uint8_t, true>
 };


-/* UTF-16 */
-
-template <>
-struct hb_utf_t<uint16_t, true>
+struct hb_utf16_t
 {
+  typedef uint16_t codepoint_t;
+
  static inline const uint16_t *
  next (const uint16_t *text,
 	const uint16_t *end,
@ -204,11 +200,11 @@ struct hb_utf_t<uint16_t, true>
 };


-/* UTF-32 */
-
-template <bool validate>
-struct hb_utf_t<uint32_t, validate>
+template <bool validate=true>
+struct hb_utf32_t
 {
+  typedef uint32_t codepoint_t;
+
  static inline const uint32_t *
  next (const uint32_t *text,
 	const uint32_t *end HB_UNUSED,
@ -246,4 +242,37 @@ struct hb_utf_t<uint32_t, validate>
 };


+struct hb_latin1_t
+{
+  typedef uint8_t codepoint_t;
+
+  static inline const uint8_t *
+  next (const uint8_t *text,
+	const uint8_t *end HB_UNUSED,
+	hb_codepoint_t *unicode,
+	hb_codepoint_t replacement HB_UNUSED)
+  {
+    *unicode = *text++;
+    return text;
+  }
+
+  static inline const uint8_t *
+  prev (const uint8_t *text,
+	const uint8_t *start HB_UNUSED,
+	hb_codepoint_t *unicode,
+	hb_codepoint_t replacement)
+  {
+    *unicode = *--text;
+    return text;
+  }
+
+  static inline unsigned int
+  strlen (const uint8_t *text)
+  {
+    unsigned int l = 0;
+    while (*text++) l++;
+    return l;
+  }
+};
+
 #endif /* HB_UTF_PRIVATE_HH */