harfbuzz/src/hb-set-digest.hh

175 lines
4.7 KiB
C++

/*
* Copyright © 2012 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_SET_DIGEST_HH
#define HB_SET_DIGEST_HH
#include "hb.hh"
/*
* The set digests here implement various "filters" that support
* "approximate member query". Conceptually these are like Bloom
* Filter and Quotient Filter, however, much smaller, faster, and
* designed to fit the requirements of our uses for glyph coverage
* queries.
*
* Our filters are highly accurate if the lookup covers fairly local
* set of glyphs, but fully flooded and ineffective if coverage is
* all over the place.
*
* The frozen-set can be used instead of a digest, to trade more
* memory for 100% accuracy, but in practice, that doesn't look like
* an attractive trade-off.
*/
template <typename mask_t, unsigned int shift>
struct hb_set_digest_lowest_bits_t
{
enum { mask_bytes = sizeof (mask_t) };
enum { mask_bits = sizeof (mask_t) * 8 };
enum { num_bits = 0
+ (mask_bytes >= 1 ? 3 : 0)
+ (mask_bytes >= 2 ? 1 : 0)
+ (mask_bytes >= 4 ? 1 : 0)
+ (mask_bytes >= 8 ? 1 : 0)
+ (mask_bytes >= 16? 1 : 0)
+ 0 };
static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
void init () { mask = 0; }
void add (hb_codepoint_t g) { mask |= mask_for (g); }
bool add_range (hb_codepoint_t a, hb_codepoint_t b)
{
if ((b >> shift) - (a >> shift) >= mask_bits - 1)
mask = (mask_t) -1;
else {
mask_t ma = mask_for (a);
mask_t mb = mask_for (b);
mask |= mb + (mb - ma) - (mb < ma);
}
return true;
}
template <typename T>
void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
{
add (*array);
array = (const T *) (stride + (const char *) array);
}
}
template <typename T>
bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
for (unsigned int i = 0; i < count; i++)
{
add (*array);
array = (const T *) (stride + (const char *) array);
}
return true;
}
bool may_have (hb_codepoint_t g) const
{ return !!(mask & mask_for (g)); }
private:
static mask_t mask_for (hb_codepoint_t g)
{ return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
mask_t mask;
};
template <typename head_t, typename tail_t>
struct hb_set_digest_combiner_t
{
void init ()
{
head.init ();
tail.init ();
}
void add (hb_codepoint_t g)
{
head.add (g);
tail.add (g);
}
bool add_range (hb_codepoint_t a, hb_codepoint_t b)
{
head.add_range (a, b);
tail.add_range (a, b);
return true;
}
template <typename T>
void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
head.add_array (array, count, stride);
tail.add_array (array, count, stride);
}
template <typename T>
bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
head.add_sorted_array (array, count, stride);
tail.add_sorted_array (array, count, stride);
return true;
}
bool may_have (hb_codepoint_t g) const
{
return head.may_have (g) && tail.may_have (g);
}
private:
head_t head;
tail_t tail;
};
/*
* hb_set_digest_t
*
* This is a combination of digests that performs "best".
* There is not much science to this: it's a result of intuition
* and testing.
*/
typedef hb_set_digest_combiner_t
<
hb_set_digest_lowest_bits_t<unsigned long, 4>,
hb_set_digest_combiner_t
<
hb_set_digest_lowest_bits_t<unsigned long, 0>,
hb_set_digest_lowest_bits_t<unsigned long, 9>
>
> hb_set_digest_t;
#endif /* HB_SET_DIGEST_HH */