[set] Add add_sorted_array()

Not optimized to use sortedness yet.  Also start putting in place infra
to faster reject bad data.

A version of Chandas.ttf found on some Chrome bots has 660kb of GPOS,
mostly junk.  That is causing 48 million of set->add() calls in
collect_glyphs(), which is insane.

In the upcoming commits, I'll be speeding that up by optimizing
add_sorted_array(), while also reducing work by rejecting out-of-sort
arrays quickly and propagate the rejection.

Part of https://bugs.chromium.org/p/chromium/issues/detail?id=794896
This commit is contained in:
Behdad Esfahbod 2017-12-14 19:33:55 -08:00
parent 9d6511a734
commit 5d02572034
6 changed files with 84 additions and 33 deletions

View File

@ -155,8 +155,9 @@ struct RangeRecord
} }
template <typename set_t> template <typename set_t>
inline void add_coverage (set_t *glyphs) const { inline bool add_coverage (set_t *glyphs) const {
glyphs->add_range (start, end); glyphs->add_range (start, end);
return likely (start <= end);
} }
GlyphID start; /* First GlyphID in the range */ GlyphID start; /* First GlyphID in the range */
@ -715,8 +716,8 @@ struct CoverageFormat1
} }
template <typename set_t> template <typename set_t>
inline void add_coverage (set_t *glyphs) const { inline bool add_coverage (set_t *glyphs) const {
glyphs->add_array (glyphArray.array, glyphArray.len); return glyphs->add_sorted_array (glyphArray.array, glyphArray.len);
} }
public: public:
@ -815,10 +816,12 @@ struct CoverageFormat2
} }
template <typename set_t> template <typename set_t>
inline void add_coverage (set_t *glyphs) const { inline bool add_coverage (set_t *glyphs) const {
unsigned int count = rangeRecord.len; unsigned int count = rangeRecord.len;
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
rangeRecord[i].add_coverage (glyphs); if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
return false;
return true;
} }
public: public:
@ -925,12 +928,14 @@ struct Coverage
} }
} }
/* Might return false if array looks unsorted.
* Used for faster rejection of corrupt data. */
template <typename set_t> template <typename set_t>
inline void add_coverage (set_t *glyphs) const { inline bool add_coverage (set_t *glyphs) const {
switch (u.format) { switch (u.format) {
case 1: u.format1.add_coverage (glyphs); break; case 1: return u.format1.add_coverage (glyphs);
case 2: u.format2.add_coverage (glyphs); break; case 2: return u.format2.add_coverage (glyphs);
default: break; default:return false;
} }
} }
@ -1016,11 +1021,15 @@ struct ClassDefFormat1
} }
template <typename set_t> template <typename set_t>
inline void add_class (set_t *glyphs, unsigned int klass) const { inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
unsigned int count = classValue.len; unsigned int count = classValue.len;
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
if (classValue[i] == klass) {
unsigned int klass = classValue[i];
if (min_klass <= klass && klass <= max_klass)
glyphs->add (startGlyph + i); glyphs->add (startGlyph + i);
}
return true;
} }
inline bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const { inline bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const {
@ -1073,11 +1082,16 @@ struct ClassDefFormat2
} }
template <typename set_t> template <typename set_t>
inline void add_class (set_t *glyphs, unsigned int klass) const { inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
unsigned int count = rangeRecord.len; unsigned int count = rangeRecord.len;
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
if (rangeRecord[i].value == klass) {
rangeRecord[i].add_coverage (glyphs); unsigned int klass = rangeRecord[i].value;
if (min_klass <= klass && klass <= max_klass)
if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
return false;
}
return true;
} }
inline bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const { inline bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const {
@ -1135,11 +1149,12 @@ struct ClassDef
} }
} }
inline void add_class (hb_set_t *glyphs, unsigned int klass) const { template <typename set_t>
inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
switch (u.format) { switch (u.format) {
case 1: u.format1.add_class (glyphs, klass); return; case 1: return u.format1.add_class (glyphs, min_klass, max_klass);
case 2: u.format2.add_class (glyphs, klass); return; case 2: return u.format2.add_class (glyphs, min_klass, max_klass);
default:return; default:return false;
} }
} }

View File

@ -352,7 +352,7 @@ struct GDEF
inline unsigned int get_glyph_class (hb_codepoint_t glyph) const inline unsigned int get_glyph_class (hb_codepoint_t glyph) const
{ return (this+glyphClassDef).get_class (glyph); } { return (this+glyphClassDef).get_class (glyph); }
inline void get_glyphs_in_class (unsigned int klass, hb_set_t *glyphs) const inline void get_glyphs_in_class (unsigned int klass, hb_set_t *glyphs) const
{ (this+glyphClassDef).add_class (glyphs, klass); } { (this+glyphClassDef).add_class (glyphs, klass, klass); }
inline bool has_mark_attachment_types (void) const { return markAttachClassDef != 0; } inline bool has_mark_attachment_types (void) const { return markAttachClassDef != 0; }
inline unsigned int get_mark_attachment_type (hb_codepoint_t glyph) const inline unsigned int get_mark_attachment_type (hb_codepoint_t glyph) const

View File

@ -758,14 +758,12 @@ struct PairPosFormat2
(this+coverage).add_coverage (c->input); (this+coverage).add_coverage (c->input);
unsigned int count1 = class1Count; unsigned int count1 = class1Count;
const ClassDef &klass1 = this+classDef1; if (count1)
for (unsigned int i = 0; i < count1; i++) (this+classDef1).add_class (c->input, 0, count1 - 1);
klass1.add_class (c->input, i);
unsigned int count2 = class2Count; unsigned int count2 = class2Count;
const ClassDef &klass2 = this+classDef2; if (count2)
for (unsigned int i = 0; i < count2; i++) (this+classDef2).add_class (c->input, 0, count2 - 1);
klass2.add_class (c->input, i);
} }
inline const Coverage &get_coverage (void) const inline const Coverage &get_coverage (void) const

View File

@ -621,7 +621,7 @@ static inline void collect_glyph (hb_set_t *glyphs, const UINT16 &value, const v
static inline void collect_class (hb_set_t *glyphs, const UINT16 &value, const void *data) static inline void collect_class (hb_set_t *glyphs, const UINT16 &value, const void *data)
{ {
const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data); const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data);
class_def.add_class (glyphs, value); class_def.add_class (glyphs, value, value);
} }
static inline void collect_coverage (hb_set_t *glyphs, const UINT16 &value, const void *data) static inline void collect_coverage (hb_set_t *glyphs, const UINT16 &value, const void *data)
{ {

View File

@ -80,11 +80,25 @@ struct hb_set_digest_lowest_bits_t
mask |= mb + (mb - ma) - (mb < ma); mask |= mb + (mb - ma) - (mb < ma);
} }
} }
template <typename T> template <typename T>
inline void add_array (const T *array, unsigned int count) inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{ {
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
add (array[i]); {
add (*array);
array = (const T *) (stride + (const char *) array);
}
}
template <typename T>
inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
{
add (*array);
array = (const T *) (stride + (const char *) array);
}
return true;
} }
inline bool may_have (hb_codepoint_t g) const { inline bool may_have (hb_codepoint_t g) const {
@ -119,10 +133,17 @@ struct hb_set_digest_combiner_t
tail.add_range (a, b); tail.add_range (a, b);
} }
template <typename T> template <typename T>
inline void add_array (const T *array, unsigned int count) inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{ {
head.add_array (array, count); head.add_array (array, count, stride);
tail.add_array (array, count); tail.add_array (array, count, stride);
}
template <typename T>
inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
head.add_sorted_array (array, count, stride);
tail.add_sorted_array (array, count, stride);
return true;
} }
inline bool may_have (hb_codepoint_t g) const { inline bool may_have (hb_codepoint_t g) const {

View File

@ -259,12 +259,29 @@ struct hb_set_t
page->add_range (major_start (mb), b); page->add_range (major_start (mb), b);
} }
} }
template <typename T> template <typename T>
inline void add_array (const T *array, unsigned int count) inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{ {
for (unsigned int i = 0; i < count; i++) for (unsigned int i = 0; i < count; i++)
add (array[i]); {
add (*array);
array = (const T *) (stride + (const char *) array);
}
} }
/* Might return false if array looks unsorted.
* Used for faster rejection of corrupt data. */
template <typename T>
inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
{
add (*array);
array = (const T *) (stride + (const char *) array);
}
return true;
}
inline void del (hb_codepoint_t g) inline void del (hb_codepoint_t g)
{ {
if (unlikely (in_error)) return; if (unlikely (in_error)) return;