diff --git a/src/Makefile.sources b/src/Makefile.sources index c698f215b..79a47a396 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -32,6 +32,7 @@ HB_BASE_sources = \ hb-cff2-interp-cs.hh \ hb-common.cc \ hb-debug.hh \ + hb-dispatch.hh \ hb-face.cc \ hb-face.hh \ hb-font.cc \ @@ -131,6 +132,8 @@ HB_BASE_sources = \ hb-ot-var-gvar-table.hh \ hb-ot-var.cc \ hb-ot-vorg-table.hh \ + hb-sanitize.hh \ + hb-serialize.hh \ hb-set-digest.hh \ hb-set.cc \ hb-set.hh \ diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 3eb01d8be..128e49036 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -34,6 +34,19 @@ static const struct { + /* Don't know how to set priority of following. Doesn't work right now. */ + //template + //uint32_t operator () (const T& v) const + //{ return hb_deref_pointer (v).hash (); } + /* Instead, the following ugly soution: */ + template + uint32_t operator () (T&& v) const { return v.hash (); } + + template + uint32_t operator () (const T *v) const + { return hb_hash (v); } + template uint32_t operator () (T v) const @@ -41,11 +54,6 @@ static const struct /* Knuth's multiplicative method: */ return (uint32_t) v * 2654435761u; } - template - uint32_t operator () (T *v) const { return hb_hash (*v); } - template - uint32_t operator () (T&& v) const { return v.hash (); } } hb_hash HB_UNUSED; static const struct diff --git a/src/hb-dispatch.hh b/src/hb-dispatch.hh new file mode 100644 index 000000000..c4347a6ca --- /dev/null +++ b/src/hb-dispatch.hh @@ -0,0 +1,50 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2012,2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_DISPATCH_HH +#define HB_DISPATCH_HH + +#include "hb.hh" + +/* + * Dispatch + */ + +template +struct hb_dispatch_context_t +{ + static constexpr unsigned max_debug_depth = MaxDebugDepth; + typedef Return return_t; + template + bool may_dispatch (const T *obj HB_UNUSED, const F *format HB_UNUSED) { return true; } + static return_t no_dispatch_return_value () { return Context::default_return_value (); } + static bool stop_sublookup_iteration (const return_t r HB_UNUSED) { return false; } +}; + + +#endif /* HB_DISPATCH_HH */ diff --git a/src/hb-machinery.hh b/src/hb-machinery.hh index 89ad8ac63..095462eab 100644 --- a/src/hb-machinery.hh +++ b/src/hb-machinery.hh @@ -32,8 +32,9 @@ #include "hb.hh" #include "hb-blob.hh" -#include "hb-array.hh" -#include "hb-vector.hh" +#include "hb-dispatch.hh" +#include "hb-sanitize.hh" +#include "hb-serialize.hh" /* @@ -143,535 +144,6 @@ static inline Type& StructAfter(TObject &X) DEFINE_SIZE_ARRAY(size, array) -/* - * Dispatch - */ - -template -struct hb_dispatch_context_t -{ - static constexpr unsigned max_debug_depth = MaxDebugDepth; - typedef Return return_t; - template - bool may_dispatch (const T *obj HB_UNUSED, const F *format HB_UNUSED) { return true; } - static return_t no_dispatch_return_value () { return Context::default_return_value (); } - static bool stop_sublookup_iteration (const return_t r HB_UNUSED) { return false; } -}; - - -/* - * Sanitize - * - * - * === Introduction === - * - * The sanitize machinery is at the core of our zero-cost font loading. We - * mmap() font file into memory and create a blob out of it. Font subtables - * are returned as a readonly sub-blob of the main font blob. These table - * blobs are then sanitized before use, to ensure invalid memory access does - * not happen. The toplevel sanitize API use is like, eg. to load the 'head' - * table: - * - * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table (face); - * - * The blob then can be converted to a head table struct with: - * - * const head *head_table = head_blob->as (); - * - * What the reference_table does is, to call hb_face_reference_table() to load - * the table blob, sanitize it and return either the sanitized blob, or empty - * blob if sanitization failed. The blob->as() function returns the null - * object of its template type argument if the blob is empty. Otherwise, it - * just casts the blob contents to the desired type. - * - * Sanitizing a blob of data with a type T works as follows (with minor - * simplification): - * - * - Cast blob content to T*, call sanitize() method of it, - * - If sanitize succeeded, return blob. - * - Otherwise, if blob is not writable, try making it writable, - * or copy if cannot be made writable in-place, - * - Call sanitize() again. Return blob if sanitize succeeded. - * - Return empty blob otherwise. - * - * - * === The sanitize() contract === - * - * The sanitize() method of each object type shall return true if it's safe to - * call other methods of the object, and false otherwise. - * - * Note that what sanitize() checks for might align with what the specification - * describes as valid table data, but does not have to be. In particular, we - * do NOT want to be pedantic and concern ourselves with validity checks that - * are irrelevant to our use of the table. On the contrary, we want to be - * lenient with error handling and accept invalid data to the extent that it - * does not impose extra burden on us. - * - * Based on the sanitize contract, one can see that what we check for depends - * on how we use the data in other table methods. Ie. if other table methods - * assume that offsets do NOT point out of the table data block, then that's - * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On - * the other hand, if other methods do such checks themselves, then sanitize() - * does not have to bother with them (glyf/local work this way). The choice - * depends on the table structure and sanitize() performance. For example, to - * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard - * to avoid such costs during font loading. By postponing such checks to the - * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime - * cost to O(used-glyphs). As such, this is preferred. - * - * The same argument can be made re GSUB/GPOS/GDEF, but there, the table - * structure is so complicated that by checking all offsets at sanitize() time, - * we make the code much simpler in other methods, as offsets and referenced - * objects do not need to be validated at each use site. - */ - -/* This limits sanitizing time on really broken fonts. */ -#ifndef HB_SANITIZE_MAX_EDITS -#define HB_SANITIZE_MAX_EDITS 32 -#endif -#ifndef HB_SANITIZE_MAX_OPS_FACTOR -#define HB_SANITIZE_MAX_OPS_FACTOR 8 -#endif -#ifndef HB_SANITIZE_MAX_OPS_MIN -#define HB_SANITIZE_MAX_OPS_MIN 16384 -#endif -#ifndef HB_SANITIZE_MAX_OPS_MAX -#define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF -#endif - -struct hb_sanitize_context_t : - hb_dispatch_context_t -{ - hb_sanitize_context_t () : - debug_depth (0), - start (nullptr), end (nullptr), - max_ops (0), - writable (false), edit_count (0), - blob (nullptr), - num_glyphs (65536), - num_glyphs_set (false) {} - - const char *get_name () { return "SANITIZE"; } - template - bool may_dispatch (const T *obj HB_UNUSED, const F *format) - { return format->sanitize (this); } - template - return_t dispatch (const T &obj) { return obj.sanitize (this); } - static return_t default_return_value () { return true; } - static return_t no_dispatch_return_value () { return false; } - bool stop_sublookup_iteration (const return_t r) const { return !r; } - - void init (hb_blob_t *b) - { - this->blob = hb_blob_reference (b); - this->writable = false; - } - - void set_num_glyphs (unsigned int num_glyphs_) - { - num_glyphs = num_glyphs_; - num_glyphs_set = true; - } - unsigned int get_num_glyphs () { return num_glyphs; } - - void set_max_ops (int max_ops_) { max_ops = max_ops_; } - - template - void set_object (const T *obj) - { - reset_object (); - - if (!obj) return; - - const char *obj_start = (const char *) obj; - if (unlikely (obj_start < this->start || this->end <= obj_start)) - this->start = this->end = nullptr; - else - { - this->start = obj_start; - this->end = obj_start + MIN (this->end - obj_start, obj->get_size ()); - } - } - - void reset_object () - { - this->start = this->blob->data; - this->end = this->start + this->blob->length; - assert (this->start <= this->end); /* Must not overflow. */ - } - - void start_processing () - { - reset_object (); - this->max_ops = MAX ((unsigned int) (this->end - this->start) * HB_SANITIZE_MAX_OPS_FACTOR, - (unsigned) HB_SANITIZE_MAX_OPS_MIN); - this->edit_count = 0; - this->debug_depth = 0; - - DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1, - "start [%p..%p] (%lu bytes)", - this->start, this->end, - (unsigned long) (this->end - this->start)); - } - - void end_processing () - { - DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1, - "end [%p..%p] %u edit requests", - this->start, this->end, this->edit_count); - - hb_blob_destroy (this->blob); - this->blob = nullptr; - this->start = this->end = nullptr; - } - - bool check_range (const void *base, - unsigned int len) const - { - const char *p = (const char *) base; - bool ok = !len || - (this->start <= p && - p <= this->end && - (unsigned int) (this->end - p) >= len && - this->max_ops-- > 0); - - DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, - "check_range [%p..%p]" - " (%d bytes) in [%p..%p] -> %s", - p, p + len, len, - this->start, this->end, - ok ? "OK" : "OUT-OF-RANGE"); - - return likely (ok); - } - - template - bool check_range (const T *base, - unsigned int a, - unsigned int b) const - { - return !hb_unsigned_mul_overflows (a, b) && - this->check_range (base, a * b); - } - - template - bool check_range (const T *base, - unsigned int a, - unsigned int b, - unsigned int c) const - { - return !hb_unsigned_mul_overflows (a, b) && - this->check_range (base, a * b, c); - } - - template - bool check_array (const T *base, unsigned int len) const - { - return this->check_range (base, len, hb_static_size (T)); - } - - template - bool check_array (const T *base, - unsigned int a, - unsigned int b) const - { - return this->check_range (base, a, b, hb_static_size (T)); - } - - template - bool check_struct (const Type *obj) const - { return likely (this->check_range (obj, obj->min_size)); } - - bool may_edit (const void *base, unsigned int len) - { - if (this->edit_count >= HB_SANITIZE_MAX_EDITS) - return false; - - const char *p = (const char *) base; - this->edit_count++; - - DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, - "may_edit(%u) [%p..%p] (%d bytes) in [%p..%p] -> %s", - this->edit_count, - p, p + len, len, - this->start, this->end, - this->writable ? "GRANTED" : "DENIED"); - - return this->writable; - } - - template - bool try_set (const Type *obj, const ValueType &v) - { - if (this->may_edit (obj, hb_static_size (Type))) - { - * const_cast (obj) = v; - return true; - } - return false; - } - - template - hb_blob_t *sanitize_blob (hb_blob_t *blob) - { - bool sane; - - init (blob); - - retry: - DEBUG_MSG_FUNC (SANITIZE, start, "start"); - - start_processing (); - - if (unlikely (!start)) - { - end_processing (); - return blob; - } - - Type *t = CastP (const_cast (start)); - - sane = t->sanitize (this); - if (sane) - { - if (edit_count) - { - DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %d edits; going for second round", edit_count); - - /* sanitize again to ensure no toe-stepping */ - edit_count = 0; - sane = t->sanitize (this); - if (edit_count) { - DEBUG_MSG_FUNC (SANITIZE, start, "requested %d edits in second round; FAILLING", edit_count); - sane = false; - } - } - } - else - { - if (edit_count && !writable) { - start = hb_blob_get_data_writable (blob, nullptr); - end = start + blob->length; - - if (start) - { - writable = true; - /* ok, we made it writable by relocating. try again */ - DEBUG_MSG_FUNC (SANITIZE, start, "retry"); - goto retry; - } - } - } - - end_processing (); - - DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED"); - if (sane) - { - hb_blob_make_immutable (blob); - return blob; - } - else - { - hb_blob_destroy (blob); - return hb_blob_get_empty (); - } - } - - template - hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag) - { - if (!num_glyphs_set) - set_num_glyphs (hb_face_get_glyph_count (face)); - return sanitize_blob (hb_face_reference_table (face, tableTag)); - } - - mutable unsigned int debug_depth; - const char *start, *end; - mutable int max_ops; - private: - bool writable; - unsigned int edit_count; - hb_blob_t *blob; - unsigned int num_glyphs; - bool num_glyphs_set; -}; - -struct hb_sanitize_with_object_t -{ - template - hb_sanitize_with_object_t (hb_sanitize_context_t *c, - const T& obj) : c (c) - { c->set_object (obj); } - ~hb_sanitize_with_object_t () - { c->reset_object (); } - - private: - hb_sanitize_context_t *c; -}; - - -/* - * Serialize - */ - -struct hb_serialize_context_t -{ - hb_serialize_context_t (void *start_, unsigned int size) - { - this->start = (char *) start_; - this->end = this->start + size; - reset (); - } - - bool in_error () const { return !this->successful; } - - void reset () - { - this->successful = true; - this->head = this->start; - this->debug_depth = 0; - } - - bool propagate_error (bool e) - { return this->successful = this->successful && e; } - template bool propagate_error (const T &obj) - { return this->successful = this->successful && !obj.in_error (); } - template bool propagate_error (const T *obj) - { return this->successful = this->successful && !obj->in_error (); } - template bool propagate_error (T1 &o1, T2 &o2) - { return propagate_error (o1) && propagate_error (o2); } - template bool propagate_error (T1 *o1, T2 *o2) - { return propagate_error (o1) && propagate_error (o2); } - template - bool propagate_error (T1 &o1, T2 &o2, T3 &o3) - { return propagate_error (o1) && propagate_error (o2, o3); } - template - bool propagate_error (T1 *o1, T2 *o2, T3 *o3) - { return propagate_error (o1) && propagate_error (o2, o3); } - - /* To be called around main operation. */ - template - Type *start_serialize () - { - DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, +1, - "start [%p..%p] (%lu bytes)", - this->start, this->end, - (unsigned long) (this->end - this->start)); - - return start_embed (); - } - void end_serialize () - { - DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, -1, - "end [%p..%p] serialized %d bytes; %s", - this->start, this->end, - (int) (this->head - this->start), - this->successful ? "successful" : "UNSUCCESSFUL"); - } - - unsigned int length () const { return this->head - this->start; } - - void align (unsigned int alignment) - { - unsigned int l = length () % alignment; - if (l) - allocate_size (alignment - l); - } - - template - Type *start_embed (const Type *_ HB_UNUSED = nullptr) const - { - Type *ret = reinterpret_cast (this->head); - return ret; - } - - template - Type *allocate_size (unsigned int size) - { - if (unlikely (!this->successful || this->end - this->head < ptrdiff_t (size))) { - this->successful = false; - return nullptr; - } - memset (this->head, 0, size); - char *ret = this->head; - this->head += size; - return reinterpret_cast (ret); - } - - template - Type *allocate_min () - { - return this->allocate_size (Type::min_size); - } - - template - Type *embed (const Type &obj) - { - unsigned int size = obj.get_size (); - Type *ret = this->allocate_size (size); - if (unlikely (!ret)) return nullptr; - memcpy (ret, &obj, size); - return ret; - } - template - hb_serialize_context_t &operator << (const Type &obj) { embed (obj); return *this; } - - template - Type *extend_size (Type &obj, unsigned int size) - { - assert (this->start <= (char *) &obj); - assert ((char *) &obj <= this->head); - assert ((char *) &obj + size >= this->head); - if (unlikely (!this->allocate_size (((char *) &obj) + size - this->head))) return nullptr; - return reinterpret_cast (&obj); - } - - template - Type *extend_min (Type &obj) { return extend_size (obj, obj.min_size); } - - template - Type *extend (Type &obj) { return extend_size (obj, obj.get_size ()); } - - /* Output routines. */ - template - Type *copy () const - { - assert (this->successful); - unsigned int len = this->head - this->start; - void *p = malloc (len); - if (p) - memcpy (p, this->start, len); - return reinterpret_cast (p); - } - hb_bytes_t copy_bytes () const - { - assert (this->successful); - unsigned int len = this->head - this->start; - void *p = malloc (len); - if (p) - memcpy (p, this->start, len); - else - return hb_bytes_t (); - return hb_bytes_t ((char *) p, len); - } - hb_blob_t *copy_blob () const - { - assert (this->successful); - return hb_blob_create (this->start, - this->head - this->start, - HB_MEMORY_MODE_DUPLICATE, - nullptr, nullptr); - } - - public: - unsigned int debug_depth; - char *start, *end, *head; - bool successful; -}; - - - /* * Big-endian integers. */ diff --git a/src/hb-map.hh b/src/hb-map.hh index f2a65a821..d36515a74 100644 --- a/src/hb-map.hh +++ b/src/hb-map.hh @@ -31,22 +31,30 @@ /* - * hb_map_t + * hb_hashmap_t */ -struct hb_map_t +template +struct hb_hashmap_t { - HB_NO_COPY_ASSIGN (hb_map_t); - hb_map_t () { init (); } - ~hb_map_t () { fini (); } + HB_NO_COPY_ASSIGN (hb_hashmap_t); + hb_hashmap_t () { init (); } + ~hb_hashmap_t () { fini (); } + + static_assert (hb_is_integer (K) || hb_is_pointer (K), ""); + static_assert (hb_is_integer (V) || hb_is_pointer (V), ""); struct item_t { - hb_codepoint_t key; - hb_codepoint_t value; + K key; + V value; - bool is_unused () const { return key == INVALID; } - bool is_tombstone () const { return key != INVALID && value == INVALID; } + bool operator== (K o) { return hb_deref_pointer (key) == hb_deref_pointer (o); } + bool operator== (const item_t &o) { return *this == o.key; } + bool is_unused () const { return key == kINVALID; } + bool is_tombstone () const { return key != kINVALID && value == vINVALID; } }; hb_object_header_t header; @@ -110,7 +118,7 @@ struct hb_map_t /* Insert back old items. */ if (old_items) for (unsigned int i = 0; i < old_size; i++) - if (old_items[i].key != INVALID && old_items[i].value != INVALID) + if (old_items[i].key != kINVALID && old_items[i].value != vINVALID) set (old_items[i].key, old_items[i].value); free (old_items); @@ -118,14 +126,14 @@ struct hb_map_t return true; } - void set (hb_codepoint_t key, hb_codepoint_t value) + void set (K key, V value) { if (unlikely (!successful)) return; - if (unlikely (key == INVALID)) return; + if (unlikely (key == kINVALID)) return; if ((occupancy + occupancy / 2) >= mask && !resize ()) return; unsigned int i = bucket_for (key); - if (value == INVALID && items[i].key != key) + if (value == vINVALID && items[i].key != key) return; /* Trying to delete non-existent key. */ if (!items[i].is_unused ()) @@ -143,24 +151,22 @@ struct hb_map_t population++; } - hb_codepoint_t get (hb_codepoint_t key) const + V get (K key) const { - if (unlikely (!items)) return INVALID; + if (unlikely (!items)) return vINVALID; unsigned int i = bucket_for (key); - return items[i].key == key ? items[i].value : INVALID; + return items[i] == key ? items[i].value : vINVALID; } - void del (hb_codepoint_t key) { set (key, INVALID); } - - static constexpr hb_codepoint_t INVALID = HB_MAP_VALUE_INVALID; + void del (K key) { set (key, vINVALID); } /* Has interface. */ - static constexpr hb_codepoint_t SENTINEL = INVALID; - typedef hb_codepoint_t value_t; - value_t operator [] (hb_codepoint_t k) const { return get (k); } - bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; } + static constexpr V SENTINEL = vINVALID; + typedef V value_t; + value_t operator [] (K k) const { return get (k); } + bool has (K k) const { return (*this)[k] != SENTINEL; } /* Projection. */ - hb_codepoint_t operator () (hb_codepoint_t k) const { return get (k); } + V operator () (K k) const { return get (k); } void clear () { @@ -174,20 +180,20 @@ struct hb_map_t protected: - unsigned int bucket_for (hb_codepoint_t key) const + unsigned int bucket_for (K key) const { unsigned int i = hb_hash (key) % prime; unsigned int step = 0; - unsigned int tombstone = INVALID; + unsigned int tombstone = (unsigned) -1; while (!items[i].is_unused ()) { - if (items[i].key == key) + if (items[i] == key) return i; - if (tombstone == INVALID && items[i].is_tombstone ()) + if (tombstone == (unsigned) -1 && items[i].is_tombstone ()) tombstone = i; i = (i + ++step) & mask; } - return tombstone == INVALID ? i : tombstone; + return tombstone == (unsigned) -1 ? i : tombstone; } static unsigned int prime_for (unsigned int shift) @@ -242,5 +248,10 @@ struct hb_map_t } }; +struct hb_map_t : hb_hashmap_t {}; + #endif /* HB_MAP_HH */ diff --git a/src/hb-meta.hh b/src/hb-meta.hh index 6d9dd6b6c..a303c67a7 100644 --- a/src/hb-meta.hh +++ b/src/hb-meta.hh @@ -63,6 +63,14 @@ template struct hb_match_pointer { typedef T type; enum { valu #define hb_remove_pointer(T) typename hb_match_pointer::type #define hb_is_pointer(T) hb_match_pointer::value +static const struct +{ + template + T operator () (T v) const { return v; } + template + T& operator () (T *v) const { return *v; } +} hb_deref_pointer HB_UNUSED; + /* Void! For when we need a expression-type of void. */ struct hb_void_t { typedef void value; }; diff --git a/src/hb-sanitize.hh b/src/hb-sanitize.hh new file mode 100644 index 000000000..e58202cf7 --- /dev/null +++ b/src/hb-sanitize.hh @@ -0,0 +1,388 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2012,2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SANITIZE_HH +#define HB_SANITIZE_HH + +#include "hb.hh" +#include "hb-blob.hh" +#include "hb-dispatch.hh" + + +/* + * Sanitize + * + * + * === Introduction === + * + * The sanitize machinery is at the core of our zero-cost font loading. We + * mmap() font file into memory and create a blob out of it. Font subtables + * are returned as a readonly sub-blob of the main font blob. These table + * blobs are then sanitized before use, to ensure invalid memory access does + * not happen. The toplevel sanitize API use is like, eg. to load the 'head' + * table: + * + * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table (face); + * + * The blob then can be converted to a head table struct with: + * + * const head *head_table = head_blob->as (); + * + * What the reference_table does is, to call hb_face_reference_table() to load + * the table blob, sanitize it and return either the sanitized blob, or empty + * blob if sanitization failed. The blob->as() function returns the null + * object of its template type argument if the blob is empty. Otherwise, it + * just casts the blob contents to the desired type. + * + * Sanitizing a blob of data with a type T works as follows (with minor + * simplification): + * + * - Cast blob content to T*, call sanitize() method of it, + * - If sanitize succeeded, return blob. + * - Otherwise, if blob is not writable, try making it writable, + * or copy if cannot be made writable in-place, + * - Call sanitize() again. Return blob if sanitize succeeded. + * - Return empty blob otherwise. + * + * + * === The sanitize() contract === + * + * The sanitize() method of each object type shall return true if it's safe to + * call other methods of the object, and false otherwise. + * + * Note that what sanitize() checks for might align with what the specification + * describes as valid table data, but does not have to be. In particular, we + * do NOT want to be pedantic and concern ourselves with validity checks that + * are irrelevant to our use of the table. On the contrary, we want to be + * lenient with error handling and accept invalid data to the extent that it + * does not impose extra burden on us. + * + * Based on the sanitize contract, one can see that what we check for depends + * on how we use the data in other table methods. Ie. if other table methods + * assume that offsets do NOT point out of the table data block, then that's + * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On + * the other hand, if other methods do such checks themselves, then sanitize() + * does not have to bother with them (glyf/local work this way). The choice + * depends on the table structure and sanitize() performance. For example, to + * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard + * to avoid such costs during font loading. By postponing such checks to the + * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime + * cost to O(used-glyphs). As such, this is preferred. + * + * The same argument can be made re GSUB/GPOS/GDEF, but there, the table + * structure is so complicated that by checking all offsets at sanitize() time, + * we make the code much simpler in other methods, as offsets and referenced + * objects do not need to be validated at each use site. + */ + +/* This limits sanitizing time on really broken fonts. */ +#ifndef HB_SANITIZE_MAX_EDITS +#define HB_SANITIZE_MAX_EDITS 32 +#endif +#ifndef HB_SANITIZE_MAX_OPS_FACTOR +#define HB_SANITIZE_MAX_OPS_FACTOR 8 +#endif +#ifndef HB_SANITIZE_MAX_OPS_MIN +#define HB_SANITIZE_MAX_OPS_MIN 16384 +#endif +#ifndef HB_SANITIZE_MAX_OPS_MAX +#define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF +#endif + +struct hb_sanitize_context_t : + hb_dispatch_context_t +{ + hb_sanitize_context_t () : + debug_depth (0), + start (nullptr), end (nullptr), + max_ops (0), + writable (false), edit_count (0), + blob (nullptr), + num_glyphs (65536), + num_glyphs_set (false) {} + + const char *get_name () { return "SANITIZE"; } + template + bool may_dispatch (const T *obj HB_UNUSED, const F *format) + { return format->sanitize (this); } + template + return_t dispatch (const T &obj) { return obj.sanitize (this); } + static return_t default_return_value () { return true; } + static return_t no_dispatch_return_value () { return false; } + bool stop_sublookup_iteration (const return_t r) const { return !r; } + + void init (hb_blob_t *b) + { + this->blob = hb_blob_reference (b); + this->writable = false; + } + + void set_num_glyphs (unsigned int num_glyphs_) + { + num_glyphs = num_glyphs_; + num_glyphs_set = true; + } + unsigned int get_num_glyphs () { return num_glyphs; } + + void set_max_ops (int max_ops_) { max_ops = max_ops_; } + + template + void set_object (const T *obj) + { + reset_object (); + + if (!obj) return; + + const char *obj_start = (const char *) obj; + if (unlikely (obj_start < this->start || this->end <= obj_start)) + this->start = this->end = nullptr; + else + { + this->start = obj_start; + this->end = obj_start + MIN (this->end - obj_start, obj->get_size ()); + } + } + + void reset_object () + { + this->start = this->blob->data; + this->end = this->start + this->blob->length; + assert (this->start <= this->end); /* Must not overflow. */ + } + + void start_processing () + { + reset_object (); + this->max_ops = MAX ((unsigned int) (this->end - this->start) * HB_SANITIZE_MAX_OPS_FACTOR, + (unsigned) HB_SANITIZE_MAX_OPS_MIN); + this->edit_count = 0; + this->debug_depth = 0; + + DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1, + "start [%p..%p] (%lu bytes)", + this->start, this->end, + (unsigned long) (this->end - this->start)); + } + + void end_processing () + { + DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1, + "end [%p..%p] %u edit requests", + this->start, this->end, this->edit_count); + + hb_blob_destroy (this->blob); + this->blob = nullptr; + this->start = this->end = nullptr; + } + + bool check_range (const void *base, + unsigned int len) const + { + const char *p = (const char *) base; + bool ok = !len || + (this->start <= p && + p <= this->end && + (unsigned int) (this->end - p) >= len && + this->max_ops-- > 0); + + DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, + "check_range [%p..%p]" + " (%d bytes) in [%p..%p] -> %s", + p, p + len, len, + this->start, this->end, + ok ? "OK" : "OUT-OF-RANGE"); + + return likely (ok); + } + + template + bool check_range (const T *base, + unsigned int a, + unsigned int b) const + { + return !hb_unsigned_mul_overflows (a, b) && + this->check_range (base, a * b); + } + + template + bool check_range (const T *base, + unsigned int a, + unsigned int b, + unsigned int c) const + { + return !hb_unsigned_mul_overflows (a, b) && + this->check_range (base, a * b, c); + } + + template + bool check_array (const T *base, unsigned int len) const + { + return this->check_range (base, len, hb_static_size (T)); + } + + template + bool check_array (const T *base, + unsigned int a, + unsigned int b) const + { + return this->check_range (base, a, b, hb_static_size (T)); + } + + template + bool check_struct (const Type *obj) const + { return likely (this->check_range (obj, obj->min_size)); } + + bool may_edit (const void *base, unsigned int len) + { + if (this->edit_count >= HB_SANITIZE_MAX_EDITS) + return false; + + const char *p = (const char *) base; + this->edit_count++; + + DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, + "may_edit(%u) [%p..%p] (%d bytes) in [%p..%p] -> %s", + this->edit_count, + p, p + len, len, + this->start, this->end, + this->writable ? "GRANTED" : "DENIED"); + + return this->writable; + } + + template + bool try_set (const Type *obj, const ValueType &v) + { + if (this->may_edit (obj, hb_static_size (Type))) + { + * const_cast (obj) = v; + return true; + } + return false; + } + + template + hb_blob_t *sanitize_blob (hb_blob_t *blob) + { + bool sane; + + init (blob); + + retry: + DEBUG_MSG_FUNC (SANITIZE, start, "start"); + + start_processing (); + + if (unlikely (!start)) + { + end_processing (); + return blob; + } + + Type *t = reinterpret_cast (const_cast (start)); + + sane = t->sanitize (this); + if (sane) + { + if (edit_count) + { + DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %d edits; going for second round", edit_count); + + /* sanitize again to ensure no toe-stepping */ + edit_count = 0; + sane = t->sanitize (this); + if (edit_count) { + DEBUG_MSG_FUNC (SANITIZE, start, "requested %d edits in second round; FAILLING", edit_count); + sane = false; + } + } + } + else + { + if (edit_count && !writable) { + start = hb_blob_get_data_writable (blob, nullptr); + end = start + blob->length; + + if (start) + { + writable = true; + /* ok, we made it writable by relocating. try again */ + DEBUG_MSG_FUNC (SANITIZE, start, "retry"); + goto retry; + } + } + } + + end_processing (); + + DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED"); + if (sane) + { + hb_blob_make_immutable (blob); + return blob; + } + else + { + hb_blob_destroy (blob); + return hb_blob_get_empty (); + } + } + + template + hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag) + { + if (!num_glyphs_set) + set_num_glyphs (hb_face_get_glyph_count (face)); + return sanitize_blob (hb_face_reference_table (face, tableTag)); + } + + mutable unsigned int debug_depth; + const char *start, *end; + mutable int max_ops; + private: + bool writable; + unsigned int edit_count; + hb_blob_t *blob; + unsigned int num_glyphs; + bool num_glyphs_set; +}; + +struct hb_sanitize_with_object_t +{ + template + hb_sanitize_with_object_t (hb_sanitize_context_t *c, const T& obj) : c (c) + { c->set_object (obj); } + ~hb_sanitize_with_object_t () + { c->reset_object (); } + + private: + hb_sanitize_context_t *c; +}; + + +#endif /* HB_SANITIZE_HH */ diff --git a/src/hb-serialize.hh b/src/hb-serialize.hh new file mode 100644 index 000000000..f5e4df521 --- /dev/null +++ b/src/hb-serialize.hh @@ -0,0 +1,196 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2012,2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SERIALIZE_HH +#define HB_SERIALIZE_HH + +#include "hb.hh" +#include "hb-blob.hh" + + +/* + * Serialize + */ + +struct hb_serialize_context_t +{ + hb_serialize_context_t (void *start_, unsigned int size) + { + this->start = (char *) start_; + this->end = this->start + size; + reset (); + } + + bool in_error () const { return !this->successful; } + + void reset () + { + this->successful = true; + this->head = this->start; + this->debug_depth = 0; + } + + bool propagate_error (bool e) + { return this->successful = this->successful && e; } + template bool propagate_error (const T &obj) + { return this->successful = this->successful && !obj.in_error (); } + template bool propagate_error (const T *obj) + { return this->successful = this->successful && !obj->in_error (); } + template bool propagate_error (T1 &o1, T2 &o2) + { return propagate_error (o1) && propagate_error (o2); } + template bool propagate_error (T1 *o1, T2 *o2) + { return propagate_error (o1) && propagate_error (o2); } + template + bool propagate_error (T1 &o1, T2 &o2, T3 &o3) + { return propagate_error (o1) && propagate_error (o2, o3); } + template + bool propagate_error (T1 *o1, T2 *o2, T3 *o3) + { return propagate_error (o1) && propagate_error (o2, o3); } + + /* To be called around main operation. */ + template + Type *start_serialize () + { + DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, +1, + "start [%p..%p] (%lu bytes)", + this->start, this->end, + (unsigned long) (this->end - this->start)); + + return start_embed (); + } + void end_serialize () + { + DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, -1, + "end [%p..%p] serialized %d bytes; %s", + this->start, this->end, + (int) (this->head - this->start), + this->successful ? "successful" : "UNSUCCESSFUL"); + } + + unsigned int length () const { return this->head - this->start; } + + void align (unsigned int alignment) + { + unsigned int l = length () % alignment; + if (l) + allocate_size (alignment - l); + } + + template + Type *start_embed (const Type *_ HB_UNUSED = nullptr) const + { + Type *ret = reinterpret_cast (this->head); + return ret; + } + + template + Type *allocate_size (unsigned int size) + { + if (unlikely (!this->successful || this->end - this->head < ptrdiff_t (size))) { + this->successful = false; + return nullptr; + } + memset (this->head, 0, size); + char *ret = this->head; + this->head += size; + return reinterpret_cast (ret); + } + + template + Type *allocate_min () + { + return this->allocate_size (Type::min_size); + } + + template + Type *embed (const Type &obj) + { + unsigned int size = obj.get_size (); + Type *ret = this->allocate_size (size); + if (unlikely (!ret)) return nullptr; + memcpy (ret, &obj, size); + return ret; + } + template + hb_serialize_context_t &operator << (const Type &obj) { embed (obj); return *this; } + + template + Type *extend_size (Type &obj, unsigned int size) + { + assert (this->start <= (char *) &obj); + assert ((char *) &obj <= this->head); + assert ((char *) &obj + size >= this->head); + if (unlikely (!this->allocate_size (((char *) &obj) + size - this->head))) return nullptr; + return reinterpret_cast (&obj); + } + + template + Type *extend_min (Type &obj) { return extend_size (obj, obj.min_size); } + + template + Type *extend (Type &obj) { return extend_size (obj, obj.get_size ()); } + + /* Output routines. */ + template + Type *copy () const + { + assert (this->successful); + unsigned int len = this->head - this->start; + void *p = malloc (len); + if (p) + memcpy (p, this->start, len); + return reinterpret_cast (p); + } + hb_bytes_t copy_bytes () const + { + assert (this->successful); + unsigned int len = this->head - this->start; + void *p = malloc (len); + if (p) + memcpy (p, this->start, len); + else + return hb_bytes_t (); + return hb_bytes_t ((char *) p, len); + } + hb_blob_t *copy_blob () const + { + assert (this->successful); + return hb_blob_create (this->start, + this->head - this->start, + HB_MEMORY_MODE_DUPLICATE, + nullptr, nullptr); + } + + public: + unsigned int debug_depth; + char *start, *end, *head; + bool successful; +}; + + +#endif /* HB_SERIALIZE_HH */