[set] Add add_sorted_array()

Not optimized to use sortedness yet.  Also start putting in place infra
to faster reject bad data.

A version of Chandas.ttf found on some Chrome bots has 660kb of GPOS,
mostly junk.  That is causing 48 million of set->add() calls in
collect_glyphs(), which is insane.

In the upcoming commits, I'll be speeding that up by optimizing
add_sorted_array(), while also reducing work by rejecting out-of-sort
arrays quickly and propagate the rejection.

Part of https://bugs.chromium.org/p/chromium/issues/detail?id=794896
This commit is contained in:
Behdad Esfahbod 2017-12-14 19:33:55 -08:00
parent 9d6511a734
commit 5d02572034
6 changed files with 84 additions and 33 deletions

View File

@ -155,8 +155,9 @@ struct RangeRecord
}
template <typename set_t>
inline void add_coverage (set_t *glyphs) const {
inline bool add_coverage (set_t *glyphs) const {
glyphs->add_range (start, end);
return likely (start <= end);
}
GlyphID start; /* First GlyphID in the range */
@ -715,8 +716,8 @@ struct CoverageFormat1
}
template <typename set_t>
inline void add_coverage (set_t *glyphs) const {
glyphs->add_array (glyphArray.array, glyphArray.len);
inline bool add_coverage (set_t *glyphs) const {
return glyphs->add_sorted_array (glyphArray.array, glyphArray.len);
}
public:
@ -815,10 +816,12 @@ struct CoverageFormat2
}
template <typename set_t>
inline void add_coverage (set_t *glyphs) const {
inline bool add_coverage (set_t *glyphs) const {
unsigned int count = rangeRecord.len;
for (unsigned int i = 0; i < count; i++)
rangeRecord[i].add_coverage (glyphs);
if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
return false;
return true;
}
public:
@ -925,12 +928,14 @@ struct Coverage
}
}
/* Might return false if array looks unsorted.
* Used for faster rejection of corrupt data. */
template <typename set_t>
inline void add_coverage (set_t *glyphs) const {
inline bool add_coverage (set_t *glyphs) const {
switch (u.format) {
case 1: u.format1.add_coverage (glyphs); break;
case 2: u.format2.add_coverage (glyphs); break;
default: break;
case 1: return u.format1.add_coverage (glyphs);
case 2: return u.format2.add_coverage (glyphs);
default:return false;
}
}
@ -1016,11 +1021,15 @@ struct ClassDefFormat1
}
template <typename set_t>
inline void add_class (set_t *glyphs, unsigned int klass) const {
inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
unsigned int count = classValue.len;
for (unsigned int i = 0; i < count; i++)
if (classValue[i] == klass)
{
unsigned int klass = classValue[i];
if (min_klass <= klass && klass <= max_klass)
glyphs->add (startGlyph + i);
}
return true;
}
inline bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const {
@ -1073,11 +1082,16 @@ struct ClassDefFormat2
}
template <typename set_t>
inline void add_class (set_t *glyphs, unsigned int klass) const {
inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
unsigned int count = rangeRecord.len;
for (unsigned int i = 0; i < count; i++)
if (rangeRecord[i].value == klass)
rangeRecord[i].add_coverage (glyphs);
{
unsigned int klass = rangeRecord[i].value;
if (min_klass <= klass && klass <= max_klass)
if (unlikely (!rangeRecord[i].add_coverage (glyphs)))
return false;
}
return true;
}
inline bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const {
@ -1135,11 +1149,12 @@ struct ClassDef
}
}
inline void add_class (hb_set_t *glyphs, unsigned int klass) const {
template <typename set_t>
inline bool add_class (set_t *glyphs, unsigned int min_klass, unsigned int max_klass) const {
switch (u.format) {
case 1: u.format1.add_class (glyphs, klass); return;
case 2: u.format2.add_class (glyphs, klass); return;
default:return;
case 1: return u.format1.add_class (glyphs, min_klass, max_klass);
case 2: return u.format2.add_class (glyphs, min_klass, max_klass);
default:return false;
}
}

View File

@ -352,7 +352,7 @@ struct GDEF
inline unsigned int get_glyph_class (hb_codepoint_t glyph) const
{ return (this+glyphClassDef).get_class (glyph); }
inline void get_glyphs_in_class (unsigned int klass, hb_set_t *glyphs) const
{ (this+glyphClassDef).add_class (glyphs, klass); }
{ (this+glyphClassDef).add_class (glyphs, klass, klass); }
inline bool has_mark_attachment_types (void) const { return markAttachClassDef != 0; }
inline unsigned int get_mark_attachment_type (hb_codepoint_t glyph) const

View File

@ -758,14 +758,12 @@ struct PairPosFormat2
(this+coverage).add_coverage (c->input);
unsigned int count1 = class1Count;
const ClassDef &klass1 = this+classDef1;
for (unsigned int i = 0; i < count1; i++)
klass1.add_class (c->input, i);
if (count1)
(this+classDef1).add_class (c->input, 0, count1 - 1);
unsigned int count2 = class2Count;
const ClassDef &klass2 = this+classDef2;
for (unsigned int i = 0; i < count2; i++)
klass2.add_class (c->input, i);
if (count2)
(this+classDef2).add_class (c->input, 0, count2 - 1);
}
inline const Coverage &get_coverage (void) const

View File

@ -621,7 +621,7 @@ static inline void collect_glyph (hb_set_t *glyphs, const UINT16 &value, const v
static inline void collect_class (hb_set_t *glyphs, const UINT16 &value, const void *data)
{
const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data);
class_def.add_class (glyphs, value);
class_def.add_class (glyphs, value, value);
}
static inline void collect_coverage (hb_set_t *glyphs, const UINT16 &value, const void *data)
{

View File

@ -80,11 +80,25 @@ struct hb_set_digest_lowest_bits_t
mask |= mb + (mb - ma) - (mb < ma);
}
}
template <typename T>
inline void add_array (const T *array, unsigned int count)
inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
add (array[i]);
{
add (*array);
array = (const T *) (stride + (const char *) array);
}
}
template <typename T>
inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
{
add (*array);
array = (const T *) (stride + (const char *) array);
}
return true;
}
inline bool may_have (hb_codepoint_t g) const {
@ -119,10 +133,17 @@ struct hb_set_digest_combiner_t
tail.add_range (a, b);
}
template <typename T>
inline void add_array (const T *array, unsigned int count)
inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
head.add_array (array, count);
tail.add_array (array, count);
head.add_array (array, count, stride);
tail.add_array (array, count, stride);
}
template <typename T>
inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
head.add_sorted_array (array, count, stride);
tail.add_sorted_array (array, count, stride);
return true;
}
inline bool may_have (hb_codepoint_t g) const {

View File

@ -259,12 +259,29 @@ struct hb_set_t
page->add_range (major_start (mb), b);
}
}
template <typename T>
inline void add_array (const T *array, unsigned int count)
inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
add (array[i]);
{
add (*array);
array = (const T *) (stride + (const char *) array);
}
}
/* Might return false if array looks unsorted.
* Used for faster rejection of corrupt data. */
template <typename T>
inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
{
add (*array);
array = (const T *) (stride + (const char *) array);
}
return true;
}
inline void del (hb_codepoint_t g)
{
if (unlikely (in_error)) return;