Merge branch 'master' into var-subset

This commit is contained in:
Michiharu Ariza 2019-03-30 18:51:15 -07:00
commit 43725d3632
8 changed files with 701 additions and 565 deletions

View File

@ -32,6 +32,7 @@ HB_BASE_sources = \
hb-cff2-interp-cs.hh \
hb-common.cc \
hb-debug.hh \
hb-dispatch.hh \
hb-face.cc \
hb-face.hh \
hb-font.cc \
@ -131,6 +132,8 @@ HB_BASE_sources = \
hb-ot-var-gvar-table.hh \
hb-ot-var.cc \
hb-ot-vorg-table.hh \
hb-sanitize.hh \
hb-serialize.hh \
hb-set-digest.hh \
hb-set.cc \
hb-set.hh \

View File

@ -34,6 +34,19 @@
static const struct
{
/* Don't know how to set priority of following. Doesn't work right now. */
//template <typename T>
//uint32_t operator () (const T& v) const
//{ return hb_deref_pointer (v).hash (); }
/* Instead, the following ugly soution: */
template <typename T,
hb_enable_if (!hb_is_integer (hb_remove_reference (T)) && !hb_is_pointer (T))>
uint32_t operator () (T&& v) const { return v.hash (); }
template <typename T>
uint32_t operator () (const T *v) const
{ return hb_hash (v); }
template <typename T,
hb_enable_if (hb_is_integer (T))>
uint32_t operator () (T v) const
@ -41,11 +54,6 @@ static const struct
/* Knuth's multiplicative method: */
return (uint32_t) v * 2654435761u;
}
template <typename T>
uint32_t operator () (T *v) const { return hb_hash (*v); }
template <typename T,
hb_enable_if (!hb_is_integer (hb_remove_reference (T)) && !hb_is_pointer (T))>
uint32_t operator () (T&& v) const { return v.hash (); }
} hb_hash HB_UNUSED;
static const struct

50
src/hb-dispatch.hh Normal file
View File

@ -0,0 +1,50 @@
/*
* Copyright © 2007,2008,2009,2010 Red Hat, Inc.
* Copyright © 2012,2018 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Red Hat Author(s): Behdad Esfahbod
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_DISPATCH_HH
#define HB_DISPATCH_HH
#include "hb.hh"
/*
* Dispatch
*/
template <typename Context, typename Return, unsigned int MaxDebugDepth>
struct hb_dispatch_context_t
{
static constexpr unsigned max_debug_depth = MaxDebugDepth;
typedef Return return_t;
template <typename T, typename F>
bool may_dispatch (const T *obj HB_UNUSED, const F *format HB_UNUSED) { return true; }
static return_t no_dispatch_return_value () { return Context::default_return_value (); }
static bool stop_sublookup_iteration (const return_t r HB_UNUSED) { return false; }
};
#endif /* HB_DISPATCH_HH */

View File

@ -32,8 +32,9 @@
#include "hb.hh"
#include "hb-blob.hh"
#include "hb-array.hh"
#include "hb-vector.hh"
#include "hb-dispatch.hh"
#include "hb-sanitize.hh"
#include "hb-serialize.hh"
/*
@ -143,535 +144,6 @@ static inline Type& StructAfter(TObject &X)
DEFINE_SIZE_ARRAY(size, array)
/*
* Dispatch
*/
template <typename Context, typename Return, unsigned int MaxDebugDepth>
struct hb_dispatch_context_t
{
static constexpr unsigned max_debug_depth = MaxDebugDepth;
typedef Return return_t;
template <typename T, typename F>
bool may_dispatch (const T *obj HB_UNUSED, const F *format HB_UNUSED) { return true; }
static return_t no_dispatch_return_value () { return Context::default_return_value (); }
static bool stop_sublookup_iteration (const return_t r HB_UNUSED) { return false; }
};
/*
* Sanitize
*
*
* === Introduction ===
*
* The sanitize machinery is at the core of our zero-cost font loading. We
* mmap() font file into memory and create a blob out of it. Font subtables
* are returned as a readonly sub-blob of the main font blob. These table
* blobs are then sanitized before use, to ensure invalid memory access does
* not happen. The toplevel sanitize API use is like, eg. to load the 'head'
* table:
*
* hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face);
*
* The blob then can be converted to a head table struct with:
*
* const head *head_table = head_blob->as<head> ();
*
* What the reference_table does is, to call hb_face_reference_table() to load
* the table blob, sanitize it and return either the sanitized blob, or empty
* blob if sanitization failed. The blob->as() function returns the null
* object of its template type argument if the blob is empty. Otherwise, it
* just casts the blob contents to the desired type.
*
* Sanitizing a blob of data with a type T works as follows (with minor
* simplification):
*
* - Cast blob content to T*, call sanitize() method of it,
* - If sanitize succeeded, return blob.
* - Otherwise, if blob is not writable, try making it writable,
* or copy if cannot be made writable in-place,
* - Call sanitize() again. Return blob if sanitize succeeded.
* - Return empty blob otherwise.
*
*
* === The sanitize() contract ===
*
* The sanitize() method of each object type shall return true if it's safe to
* call other methods of the object, and false otherwise.
*
* Note that what sanitize() checks for might align with what the specification
* describes as valid table data, but does not have to be. In particular, we
* do NOT want to be pedantic and concern ourselves with validity checks that
* are irrelevant to our use of the table. On the contrary, we want to be
* lenient with error handling and accept invalid data to the extent that it
* does not impose extra burden on us.
*
* Based on the sanitize contract, one can see that what we check for depends
* on how we use the data in other table methods. Ie. if other table methods
* assume that offsets do NOT point out of the table data block, then that's
* something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On
* the other hand, if other methods do such checks themselves, then sanitize()
* does not have to bother with them (glyf/local work this way). The choice
* depends on the table structure and sanitize() performance. For example, to
* check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard
* to avoid such costs during font loading. By postponing such checks to the
* actual glyph loading, we reduce the sanitize cost to O(1) and total runtime
* cost to O(used-glyphs). As such, this is preferred.
*
* The same argument can be made re GSUB/GPOS/GDEF, but there, the table
* structure is so complicated that by checking all offsets at sanitize() time,
* we make the code much simpler in other methods, as offsets and referenced
* objects do not need to be validated at each use site.
*/
/* This limits sanitizing time on really broken fonts. */
#ifndef HB_SANITIZE_MAX_EDITS
#define HB_SANITIZE_MAX_EDITS 32
#endif
#ifndef HB_SANITIZE_MAX_OPS_FACTOR
#define HB_SANITIZE_MAX_OPS_FACTOR 8
#endif
#ifndef HB_SANITIZE_MAX_OPS_MIN
#define HB_SANITIZE_MAX_OPS_MIN 16384
#endif
#ifndef HB_SANITIZE_MAX_OPS_MAX
#define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF
#endif
struct hb_sanitize_context_t :
hb_dispatch_context_t<hb_sanitize_context_t, bool, HB_DEBUG_SANITIZE>
{
hb_sanitize_context_t () :
debug_depth (0),
start (nullptr), end (nullptr),
max_ops (0),
writable (false), edit_count (0),
blob (nullptr),
num_glyphs (65536),
num_glyphs_set (false) {}
const char *get_name () { return "SANITIZE"; }
template <typename T, typename F>
bool may_dispatch (const T *obj HB_UNUSED, const F *format)
{ return format->sanitize (this); }
template <typename T>
return_t dispatch (const T &obj) { return obj.sanitize (this); }
static return_t default_return_value () { return true; }
static return_t no_dispatch_return_value () { return false; }
bool stop_sublookup_iteration (const return_t r) const { return !r; }
void init (hb_blob_t *b)
{
this->blob = hb_blob_reference (b);
this->writable = false;
}
void set_num_glyphs (unsigned int num_glyphs_)
{
num_glyphs = num_glyphs_;
num_glyphs_set = true;
}
unsigned int get_num_glyphs () { return num_glyphs; }
void set_max_ops (int max_ops_) { max_ops = max_ops_; }
template <typename T>
void set_object (const T *obj)
{
reset_object ();
if (!obj) return;
const char *obj_start = (const char *) obj;
if (unlikely (obj_start < this->start || this->end <= obj_start))
this->start = this->end = nullptr;
else
{
this->start = obj_start;
this->end = obj_start + MIN<uintptr_t> (this->end - obj_start, obj->get_size ());
}
}
void reset_object ()
{
this->start = this->blob->data;
this->end = this->start + this->blob->length;
assert (this->start <= this->end); /* Must not overflow. */
}
void start_processing ()
{
reset_object ();
this->max_ops = MAX ((unsigned int) (this->end - this->start) * HB_SANITIZE_MAX_OPS_FACTOR,
(unsigned) HB_SANITIZE_MAX_OPS_MIN);
this->edit_count = 0;
this->debug_depth = 0;
DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1,
"start [%p..%p] (%lu bytes)",
this->start, this->end,
(unsigned long) (this->end - this->start));
}
void end_processing ()
{
DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1,
"end [%p..%p] %u edit requests",
this->start, this->end, this->edit_count);
hb_blob_destroy (this->blob);
this->blob = nullptr;
this->start = this->end = nullptr;
}
bool check_range (const void *base,
unsigned int len) const
{
const char *p = (const char *) base;
bool ok = !len ||
(this->start <= p &&
p <= this->end &&
(unsigned int) (this->end - p) >= len &&
this->max_ops-- > 0);
DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
"check_range [%p..%p]"
" (%d bytes) in [%p..%p] -> %s",
p, p + len, len,
this->start, this->end,
ok ? "OK" : "OUT-OF-RANGE");
return likely (ok);
}
template <typename T>
bool check_range (const T *base,
unsigned int a,
unsigned int b) const
{
return !hb_unsigned_mul_overflows (a, b) &&
this->check_range (base, a * b);
}
template <typename T>
bool check_range (const T *base,
unsigned int a,
unsigned int b,
unsigned int c) const
{
return !hb_unsigned_mul_overflows (a, b) &&
this->check_range (base, a * b, c);
}
template <typename T>
bool check_array (const T *base, unsigned int len) const
{
return this->check_range (base, len, hb_static_size (T));
}
template <typename T>
bool check_array (const T *base,
unsigned int a,
unsigned int b) const
{
return this->check_range (base, a, b, hb_static_size (T));
}
template <typename Type>
bool check_struct (const Type *obj) const
{ return likely (this->check_range (obj, obj->min_size)); }
bool may_edit (const void *base, unsigned int len)
{
if (this->edit_count >= HB_SANITIZE_MAX_EDITS)
return false;
const char *p = (const char *) base;
this->edit_count++;
DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
"may_edit(%u) [%p..%p] (%d bytes) in [%p..%p] -> %s",
this->edit_count,
p, p + len, len,
this->start, this->end,
this->writable ? "GRANTED" : "DENIED");
return this->writable;
}
template <typename Type, typename ValueType>
bool try_set (const Type *obj, const ValueType &v)
{
if (this->may_edit (obj, hb_static_size (Type)))
{
* const_cast<Type *> (obj) = v;
return true;
}
return false;
}
template <typename Type>
hb_blob_t *sanitize_blob (hb_blob_t *blob)
{
bool sane;
init (blob);
retry:
DEBUG_MSG_FUNC (SANITIZE, start, "start");
start_processing ();
if (unlikely (!start))
{
end_processing ();
return blob;
}
Type *t = CastP<Type> (const_cast<char *> (start));
sane = t->sanitize (this);
if (sane)
{
if (edit_count)
{
DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %d edits; going for second round", edit_count);
/* sanitize again to ensure no toe-stepping */
edit_count = 0;
sane = t->sanitize (this);
if (edit_count) {
DEBUG_MSG_FUNC (SANITIZE, start, "requested %d edits in second round; FAILLING", edit_count);
sane = false;
}
}
}
else
{
if (edit_count && !writable) {
start = hb_blob_get_data_writable (blob, nullptr);
end = start + blob->length;
if (start)
{
writable = true;
/* ok, we made it writable by relocating. try again */
DEBUG_MSG_FUNC (SANITIZE, start, "retry");
goto retry;
}
}
}
end_processing ();
DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED");
if (sane)
{
hb_blob_make_immutable (blob);
return blob;
}
else
{
hb_blob_destroy (blob);
return hb_blob_get_empty ();
}
}
template <typename Type>
hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag)
{
if (!num_glyphs_set)
set_num_glyphs (hb_face_get_glyph_count (face));
return sanitize_blob<Type> (hb_face_reference_table (face, tableTag));
}
mutable unsigned int debug_depth;
const char *start, *end;
mutable int max_ops;
private:
bool writable;
unsigned int edit_count;
hb_blob_t *blob;
unsigned int num_glyphs;
bool num_glyphs_set;
};
struct hb_sanitize_with_object_t
{
template <typename T>
hb_sanitize_with_object_t (hb_sanitize_context_t *c,
const T& obj) : c (c)
{ c->set_object (obj); }
~hb_sanitize_with_object_t ()
{ c->reset_object (); }
private:
hb_sanitize_context_t *c;
};
/*
* Serialize
*/
struct hb_serialize_context_t
{
hb_serialize_context_t (void *start_, unsigned int size)
{
this->start = (char *) start_;
this->end = this->start + size;
reset ();
}
bool in_error () const { return !this->successful; }
void reset ()
{
this->successful = true;
this->head = this->start;
this->debug_depth = 0;
}
bool propagate_error (bool e)
{ return this->successful = this->successful && e; }
template <typename T> bool propagate_error (const T &obj)
{ return this->successful = this->successful && !obj.in_error (); }
template <typename T> bool propagate_error (const T *obj)
{ return this->successful = this->successful && !obj->in_error (); }
template <typename T1, typename T2> bool propagate_error (T1 &o1, T2 &o2)
{ return propagate_error (o1) && propagate_error (o2); }
template <typename T1, typename T2> bool propagate_error (T1 *o1, T2 *o2)
{ return propagate_error (o1) && propagate_error (o2); }
template <typename T1, typename T2, typename T3>
bool propagate_error (T1 &o1, T2 &o2, T3 &o3)
{ return propagate_error (o1) && propagate_error (o2, o3); }
template <typename T1, typename T2, typename T3>
bool propagate_error (T1 *o1, T2 *o2, T3 *o3)
{ return propagate_error (o1) && propagate_error (o2, o3); }
/* To be called around main operation. */
template <typename Type>
Type *start_serialize ()
{
DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, +1,
"start [%p..%p] (%lu bytes)",
this->start, this->end,
(unsigned long) (this->end - this->start));
return start_embed<Type> ();
}
void end_serialize ()
{
DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, -1,
"end [%p..%p] serialized %d bytes; %s",
this->start, this->end,
(int) (this->head - this->start),
this->successful ? "successful" : "UNSUCCESSFUL");
}
unsigned int length () const { return this->head - this->start; }
void align (unsigned int alignment)
{
unsigned int l = length () % alignment;
if (l)
allocate_size<void> (alignment - l);
}
template <typename Type>
Type *start_embed (const Type *_ HB_UNUSED = nullptr) const
{
Type *ret = reinterpret_cast<Type *> (this->head);
return ret;
}
template <typename Type>
Type *allocate_size (unsigned int size)
{
if (unlikely (!this->successful || this->end - this->head < ptrdiff_t (size))) {
this->successful = false;
return nullptr;
}
memset (this->head, 0, size);
char *ret = this->head;
this->head += size;
return reinterpret_cast<Type *> (ret);
}
template <typename Type>
Type *allocate_min ()
{
return this->allocate_size<Type> (Type::min_size);
}
template <typename Type>
Type *embed (const Type &obj)
{
unsigned int size = obj.get_size ();
Type *ret = this->allocate_size<Type> (size);
if (unlikely (!ret)) return nullptr;
memcpy (ret, &obj, size);
return ret;
}
template <typename Type>
hb_serialize_context_t &operator << (const Type &obj) { embed (obj); return *this; }
template <typename Type>
Type *extend_size (Type &obj, unsigned int size)
{
assert (this->start <= (char *) &obj);
assert ((char *) &obj <= this->head);
assert ((char *) &obj + size >= this->head);
if (unlikely (!this->allocate_size<Type> (((char *) &obj) + size - this->head))) return nullptr;
return reinterpret_cast<Type *> (&obj);
}
template <typename Type>
Type *extend_min (Type &obj) { return extend_size (obj, obj.min_size); }
template <typename Type>
Type *extend (Type &obj) { return extend_size (obj, obj.get_size ()); }
/* Output routines. */
template <typename Type>
Type *copy () const
{
assert (this->successful);
unsigned int len = this->head - this->start;
void *p = malloc (len);
if (p)
memcpy (p, this->start, len);
return reinterpret_cast<Type *> (p);
}
hb_bytes_t copy_bytes () const
{
assert (this->successful);
unsigned int len = this->head - this->start;
void *p = malloc (len);
if (p)
memcpy (p, this->start, len);
else
return hb_bytes_t ();
return hb_bytes_t ((char *) p, len);
}
hb_blob_t *copy_blob () const
{
assert (this->successful);
return hb_blob_create (this->start,
this->head - this->start,
HB_MEMORY_MODE_DUPLICATE,
nullptr, nullptr);
}
public:
unsigned int debug_depth;
char *start, *end, *head;
bool successful;
};
/*
* Big-endian integers.
*/

View File

@ -31,22 +31,30 @@
/*
* hb_map_t
* hb_hashmap_t
*/
struct hb_map_t
template <typename K, typename V,
K kINVALID = hb_is_pointer (K) ? 0 : (K) -1,
V vINVALID = hb_is_pointer (V) ? 0 : (V) -1>
struct hb_hashmap_t
{
HB_NO_COPY_ASSIGN (hb_map_t);
hb_map_t () { init (); }
~hb_map_t () { fini (); }
HB_NO_COPY_ASSIGN (hb_hashmap_t);
hb_hashmap_t () { init (); }
~hb_hashmap_t () { fini (); }
static_assert (hb_is_integer (K) || hb_is_pointer (K), "");
static_assert (hb_is_integer (V) || hb_is_pointer (V), "");
struct item_t
{
hb_codepoint_t key;
hb_codepoint_t value;
K key;
V value;
bool is_unused () const { return key == INVALID; }
bool is_tombstone () const { return key != INVALID && value == INVALID; }
bool operator== (K o) { return hb_deref_pointer (key) == hb_deref_pointer (o); }
bool operator== (const item_t &o) { return *this == o.key; }
bool is_unused () const { return key == kINVALID; }
bool is_tombstone () const { return key != kINVALID && value == vINVALID; }
};
hb_object_header_t header;
@ -110,7 +118,7 @@ struct hb_map_t
/* Insert back old items. */
if (old_items)
for (unsigned int i = 0; i < old_size; i++)
if (old_items[i].key != INVALID && old_items[i].value != INVALID)
if (old_items[i].key != kINVALID && old_items[i].value != vINVALID)
set (old_items[i].key, old_items[i].value);
free (old_items);
@ -118,14 +126,14 @@ struct hb_map_t
return true;
}
void set (hb_codepoint_t key, hb_codepoint_t value)
void set (K key, V value)
{
if (unlikely (!successful)) return;
if (unlikely (key == INVALID)) return;
if (unlikely (key == kINVALID)) return;
if ((occupancy + occupancy / 2) >= mask && !resize ()) return;
unsigned int i = bucket_for (key);
if (value == INVALID && items[i].key != key)
if (value == vINVALID && items[i].key != key)
return; /* Trying to delete non-existent key. */
if (!items[i].is_unused ())
@ -143,24 +151,22 @@ struct hb_map_t
population++;
}
hb_codepoint_t get (hb_codepoint_t key) const
V get (K key) const
{
if (unlikely (!items)) return INVALID;
if (unlikely (!items)) return vINVALID;
unsigned int i = bucket_for (key);
return items[i].key == key ? items[i].value : INVALID;
return items[i] == key ? items[i].value : vINVALID;
}
void del (hb_codepoint_t key) { set (key, INVALID); }
static constexpr hb_codepoint_t INVALID = HB_MAP_VALUE_INVALID;
void del (K key) { set (key, vINVALID); }
/* Has interface. */
static constexpr hb_codepoint_t SENTINEL = INVALID;
typedef hb_codepoint_t value_t;
value_t operator [] (hb_codepoint_t k) const { return get (k); }
bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; }
static constexpr V SENTINEL = vINVALID;
typedef V value_t;
value_t operator [] (K k) const { return get (k); }
bool has (K k) const { return (*this)[k] != SENTINEL; }
/* Projection. */
hb_codepoint_t operator () (hb_codepoint_t k) const { return get (k); }
V operator () (K k) const { return get (k); }
void clear ()
{
@ -174,20 +180,20 @@ struct hb_map_t
protected:
unsigned int bucket_for (hb_codepoint_t key) const
unsigned int bucket_for (K key) const
{
unsigned int i = hb_hash (key) % prime;
unsigned int step = 0;
unsigned int tombstone = INVALID;
unsigned int tombstone = (unsigned) -1;
while (!items[i].is_unused ())
{
if (items[i].key == key)
if (items[i] == key)
return i;
if (tombstone == INVALID && items[i].is_tombstone ())
if (tombstone == (unsigned) -1 && items[i].is_tombstone ())
tombstone = i;
i = (i + ++step) & mask;
}
return tombstone == INVALID ? i : tombstone;
return tombstone == (unsigned) -1 ? i : tombstone;
}
static unsigned int prime_for (unsigned int shift)
@ -242,5 +248,10 @@ struct hb_map_t
}
};
struct hb_map_t : hb_hashmap_t<hb_codepoint_t,
hb_codepoint_t,
HB_MAP_VALUE_INVALID,
HB_MAP_VALUE_INVALID> {};
#endif /* HB_MAP_HH */

View File

@ -63,6 +63,14 @@ template <typename T> struct hb_match_pointer<T *> { typedef T type; enum { valu
#define hb_remove_pointer(T) typename hb_match_pointer<T>::type
#define hb_is_pointer(T) hb_match_pointer<T>::value
static const struct
{
template <typename T>
T operator () (T v) const { return v; }
template <typename T>
T& operator () (T *v) const { return *v; }
} hb_deref_pointer HB_UNUSED;
/* Void! For when we need a expression-type of void. */
struct hb_void_t { typedef void value; };

388
src/hb-sanitize.hh Normal file
View File

@ -0,0 +1,388 @@
/*
* Copyright © 2007,2008,2009,2010 Red Hat, Inc.
* Copyright © 2012,2018 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Red Hat Author(s): Behdad Esfahbod
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_SANITIZE_HH
#define HB_SANITIZE_HH
#include "hb.hh"
#include "hb-blob.hh"
#include "hb-dispatch.hh"
/*
* Sanitize
*
*
* === Introduction ===
*
* The sanitize machinery is at the core of our zero-cost font loading. We
* mmap() font file into memory and create a blob out of it. Font subtables
* are returned as a readonly sub-blob of the main font blob. These table
* blobs are then sanitized before use, to ensure invalid memory access does
* not happen. The toplevel sanitize API use is like, eg. to load the 'head'
* table:
*
* hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face);
*
* The blob then can be converted to a head table struct with:
*
* const head *head_table = head_blob->as<head> ();
*
* What the reference_table does is, to call hb_face_reference_table() to load
* the table blob, sanitize it and return either the sanitized blob, or empty
* blob if sanitization failed. The blob->as() function returns the null
* object of its template type argument if the blob is empty. Otherwise, it
* just casts the blob contents to the desired type.
*
* Sanitizing a blob of data with a type T works as follows (with minor
* simplification):
*
* - Cast blob content to T*, call sanitize() method of it,
* - If sanitize succeeded, return blob.
* - Otherwise, if blob is not writable, try making it writable,
* or copy if cannot be made writable in-place,
* - Call sanitize() again. Return blob if sanitize succeeded.
* - Return empty blob otherwise.
*
*
* === The sanitize() contract ===
*
* The sanitize() method of each object type shall return true if it's safe to
* call other methods of the object, and false otherwise.
*
* Note that what sanitize() checks for might align with what the specification
* describes as valid table data, but does not have to be. In particular, we
* do NOT want to be pedantic and concern ourselves with validity checks that
* are irrelevant to our use of the table. On the contrary, we want to be
* lenient with error handling and accept invalid data to the extent that it
* does not impose extra burden on us.
*
* Based on the sanitize contract, one can see that what we check for depends
* on how we use the data in other table methods. Ie. if other table methods
* assume that offsets do NOT point out of the table data block, then that's
* something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On
* the other hand, if other methods do such checks themselves, then sanitize()
* does not have to bother with them (glyf/local work this way). The choice
* depends on the table structure and sanitize() performance. For example, to
* check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard
* to avoid such costs during font loading. By postponing such checks to the
* actual glyph loading, we reduce the sanitize cost to O(1) and total runtime
* cost to O(used-glyphs). As such, this is preferred.
*
* The same argument can be made re GSUB/GPOS/GDEF, but there, the table
* structure is so complicated that by checking all offsets at sanitize() time,
* we make the code much simpler in other methods, as offsets and referenced
* objects do not need to be validated at each use site.
*/
/* This limits sanitizing time on really broken fonts. */
#ifndef HB_SANITIZE_MAX_EDITS
#define HB_SANITIZE_MAX_EDITS 32
#endif
#ifndef HB_SANITIZE_MAX_OPS_FACTOR
#define HB_SANITIZE_MAX_OPS_FACTOR 8
#endif
#ifndef HB_SANITIZE_MAX_OPS_MIN
#define HB_SANITIZE_MAX_OPS_MIN 16384
#endif
#ifndef HB_SANITIZE_MAX_OPS_MAX
#define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF
#endif
struct hb_sanitize_context_t :
hb_dispatch_context_t<hb_sanitize_context_t, bool, HB_DEBUG_SANITIZE>
{
hb_sanitize_context_t () :
debug_depth (0),
start (nullptr), end (nullptr),
max_ops (0),
writable (false), edit_count (0),
blob (nullptr),
num_glyphs (65536),
num_glyphs_set (false) {}
const char *get_name () { return "SANITIZE"; }
template <typename T, typename F>
bool may_dispatch (const T *obj HB_UNUSED, const F *format)
{ return format->sanitize (this); }
template <typename T>
return_t dispatch (const T &obj) { return obj.sanitize (this); }
static return_t default_return_value () { return true; }
static return_t no_dispatch_return_value () { return false; }
bool stop_sublookup_iteration (const return_t r) const { return !r; }
void init (hb_blob_t *b)
{
this->blob = hb_blob_reference (b);
this->writable = false;
}
void set_num_glyphs (unsigned int num_glyphs_)
{
num_glyphs = num_glyphs_;
num_glyphs_set = true;
}
unsigned int get_num_glyphs () { return num_glyphs; }
void set_max_ops (int max_ops_) { max_ops = max_ops_; }
template <typename T>
void set_object (const T *obj)
{
reset_object ();
if (!obj) return;
const char *obj_start = (const char *) obj;
if (unlikely (obj_start < this->start || this->end <= obj_start))
this->start = this->end = nullptr;
else
{
this->start = obj_start;
this->end = obj_start + MIN<uintptr_t> (this->end - obj_start, obj->get_size ());
}
}
void reset_object ()
{
this->start = this->blob->data;
this->end = this->start + this->blob->length;
assert (this->start <= this->end); /* Must not overflow. */
}
void start_processing ()
{
reset_object ();
this->max_ops = MAX ((unsigned int) (this->end - this->start) * HB_SANITIZE_MAX_OPS_FACTOR,
(unsigned) HB_SANITIZE_MAX_OPS_MIN);
this->edit_count = 0;
this->debug_depth = 0;
DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1,
"start [%p..%p] (%lu bytes)",
this->start, this->end,
(unsigned long) (this->end - this->start));
}
void end_processing ()
{
DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1,
"end [%p..%p] %u edit requests",
this->start, this->end, this->edit_count);
hb_blob_destroy (this->blob);
this->blob = nullptr;
this->start = this->end = nullptr;
}
bool check_range (const void *base,
unsigned int len) const
{
const char *p = (const char *) base;
bool ok = !len ||
(this->start <= p &&
p <= this->end &&
(unsigned int) (this->end - p) >= len &&
this->max_ops-- > 0);
DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
"check_range [%p..%p]"
" (%d bytes) in [%p..%p] -> %s",
p, p + len, len,
this->start, this->end,
ok ? "OK" : "OUT-OF-RANGE");
return likely (ok);
}
template <typename T>
bool check_range (const T *base,
unsigned int a,
unsigned int b) const
{
return !hb_unsigned_mul_overflows (a, b) &&
this->check_range (base, a * b);
}
template <typename T>
bool check_range (const T *base,
unsigned int a,
unsigned int b,
unsigned int c) const
{
return !hb_unsigned_mul_overflows (a, b) &&
this->check_range (base, a * b, c);
}
template <typename T>
bool check_array (const T *base, unsigned int len) const
{
return this->check_range (base, len, hb_static_size (T));
}
template <typename T>
bool check_array (const T *base,
unsigned int a,
unsigned int b) const
{
return this->check_range (base, a, b, hb_static_size (T));
}
template <typename Type>
bool check_struct (const Type *obj) const
{ return likely (this->check_range (obj, obj->min_size)); }
bool may_edit (const void *base, unsigned int len)
{
if (this->edit_count >= HB_SANITIZE_MAX_EDITS)
return false;
const char *p = (const char *) base;
this->edit_count++;
DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
"may_edit(%u) [%p..%p] (%d bytes) in [%p..%p] -> %s",
this->edit_count,
p, p + len, len,
this->start, this->end,
this->writable ? "GRANTED" : "DENIED");
return this->writable;
}
template <typename Type, typename ValueType>
bool try_set (const Type *obj, const ValueType &v)
{
if (this->may_edit (obj, hb_static_size (Type)))
{
* const_cast<Type *> (obj) = v;
return true;
}
return false;
}
template <typename Type>
hb_blob_t *sanitize_blob (hb_blob_t *blob)
{
bool sane;
init (blob);
retry:
DEBUG_MSG_FUNC (SANITIZE, start, "start");
start_processing ();
if (unlikely (!start))
{
end_processing ();
return blob;
}
Type *t = reinterpret_cast<Type *> (const_cast<char *> (start));
sane = t->sanitize (this);
if (sane)
{
if (edit_count)
{
DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %d edits; going for second round", edit_count);
/* sanitize again to ensure no toe-stepping */
edit_count = 0;
sane = t->sanitize (this);
if (edit_count) {
DEBUG_MSG_FUNC (SANITIZE, start, "requested %d edits in second round; FAILLING", edit_count);
sane = false;
}
}
}
else
{
if (edit_count && !writable) {
start = hb_blob_get_data_writable (blob, nullptr);
end = start + blob->length;
if (start)
{
writable = true;
/* ok, we made it writable by relocating. try again */
DEBUG_MSG_FUNC (SANITIZE, start, "retry");
goto retry;
}
}
}
end_processing ();
DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED");
if (sane)
{
hb_blob_make_immutable (blob);
return blob;
}
else
{
hb_blob_destroy (blob);
return hb_blob_get_empty ();
}
}
template <typename Type>
hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag)
{
if (!num_glyphs_set)
set_num_glyphs (hb_face_get_glyph_count (face));
return sanitize_blob<Type> (hb_face_reference_table (face, tableTag));
}
mutable unsigned int debug_depth;
const char *start, *end;
mutable int max_ops;
private:
bool writable;
unsigned int edit_count;
hb_blob_t *blob;
unsigned int num_glyphs;
bool num_glyphs_set;
};
struct hb_sanitize_with_object_t
{
template <typename T>
hb_sanitize_with_object_t (hb_sanitize_context_t *c, const T& obj) : c (c)
{ c->set_object (obj); }
~hb_sanitize_with_object_t ()
{ c->reset_object (); }
private:
hb_sanitize_context_t *c;
};
#endif /* HB_SANITIZE_HH */

196
src/hb-serialize.hh Normal file
View File

@ -0,0 +1,196 @@
/*
* Copyright © 2007,2008,2009,2010 Red Hat, Inc.
* Copyright © 2012,2018 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Red Hat Author(s): Behdad Esfahbod
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_SERIALIZE_HH
#define HB_SERIALIZE_HH
#include "hb.hh"
#include "hb-blob.hh"
/*
* Serialize
*/
struct hb_serialize_context_t
{
hb_serialize_context_t (void *start_, unsigned int size)
{
this->start = (char *) start_;
this->end = this->start + size;
reset ();
}
bool in_error () const { return !this->successful; }
void reset ()
{
this->successful = true;
this->head = this->start;
this->debug_depth = 0;
}
bool propagate_error (bool e)
{ return this->successful = this->successful && e; }
template <typename T> bool propagate_error (const T &obj)
{ return this->successful = this->successful && !obj.in_error (); }
template <typename T> bool propagate_error (const T *obj)
{ return this->successful = this->successful && !obj->in_error (); }
template <typename T1, typename T2> bool propagate_error (T1 &o1, T2 &o2)
{ return propagate_error (o1) && propagate_error (o2); }
template <typename T1, typename T2> bool propagate_error (T1 *o1, T2 *o2)
{ return propagate_error (o1) && propagate_error (o2); }
template <typename T1, typename T2, typename T3>
bool propagate_error (T1 &o1, T2 &o2, T3 &o3)
{ return propagate_error (o1) && propagate_error (o2, o3); }
template <typename T1, typename T2, typename T3>
bool propagate_error (T1 *o1, T2 *o2, T3 *o3)
{ return propagate_error (o1) && propagate_error (o2, o3); }
/* To be called around main operation. */
template <typename Type>
Type *start_serialize ()
{
DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, +1,
"start [%p..%p] (%lu bytes)",
this->start, this->end,
(unsigned long) (this->end - this->start));
return start_embed<Type> ();
}
void end_serialize ()
{
DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, -1,
"end [%p..%p] serialized %d bytes; %s",
this->start, this->end,
(int) (this->head - this->start),
this->successful ? "successful" : "UNSUCCESSFUL");
}
unsigned int length () const { return this->head - this->start; }
void align (unsigned int alignment)
{
unsigned int l = length () % alignment;
if (l)
allocate_size<void> (alignment - l);
}
template <typename Type>
Type *start_embed (const Type *_ HB_UNUSED = nullptr) const
{
Type *ret = reinterpret_cast<Type *> (this->head);
return ret;
}
template <typename Type>
Type *allocate_size (unsigned int size)
{
if (unlikely (!this->successful || this->end - this->head < ptrdiff_t (size))) {
this->successful = false;
return nullptr;
}
memset (this->head, 0, size);
char *ret = this->head;
this->head += size;
return reinterpret_cast<Type *> (ret);
}
template <typename Type>
Type *allocate_min ()
{
return this->allocate_size<Type> (Type::min_size);
}
template <typename Type>
Type *embed (const Type &obj)
{
unsigned int size = obj.get_size ();
Type *ret = this->allocate_size<Type> (size);
if (unlikely (!ret)) return nullptr;
memcpy (ret, &obj, size);
return ret;
}
template <typename Type>
hb_serialize_context_t &operator << (const Type &obj) { embed (obj); return *this; }
template <typename Type>
Type *extend_size (Type &obj, unsigned int size)
{
assert (this->start <= (char *) &obj);
assert ((char *) &obj <= this->head);
assert ((char *) &obj + size >= this->head);
if (unlikely (!this->allocate_size<Type> (((char *) &obj) + size - this->head))) return nullptr;
return reinterpret_cast<Type *> (&obj);
}
template <typename Type>
Type *extend_min (Type &obj) { return extend_size (obj, obj.min_size); }
template <typename Type>
Type *extend (Type &obj) { return extend_size (obj, obj.get_size ()); }
/* Output routines. */
template <typename Type>
Type *copy () const
{
assert (this->successful);
unsigned int len = this->head - this->start;
void *p = malloc (len);
if (p)
memcpy (p, this->start, len);
return reinterpret_cast<Type *> (p);
}
hb_bytes_t copy_bytes () const
{
assert (this->successful);
unsigned int len = this->head - this->start;
void *p = malloc (len);
if (p)
memcpy (p, this->start, len);
else
return hb_bytes_t ();
return hb_bytes_t ((char *) p, len);
}
hb_blob_t *copy_blob () const
{
assert (this->successful);
return hb_blob_create (this->start,
this->head - this->start,
HB_MEMORY_MODE_DUPLICATE,
nullptr, nullptr);
}
public:
unsigned int debug_depth;
char *start, *end, *head;
bool successful;
};
#endif /* HB_SERIALIZE_HH */