[set] Add call to export set contents to an array. (#3500)

[set] Add hb_set_next_many.
This commit is contained in:
Andrew John 2022-03-25 08:36:44 -07:00 committed by GitHub
parent a55a42444d
commit 0182988229
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 315 additions and 1 deletions

View File

@ -86,6 +86,72 @@ struct hb_bit_page_t
void set_range (hb_codepoint_t a, hb_codepoint_t b, bool v)
{ if (v) add_range (a, b); else del_range (a, b); }
// Writes out page values to the array p. Returns the number of values
// written. At most size codepoints will be written.
unsigned int write (uint32_t base,
unsigned int start_value,
hb_codepoint_t *p,
unsigned int size) const
{
unsigned int start_v = start_value >> ELT_BITS_LOG_2;
unsigned int start_bit = start_value & ELT_MASK;
unsigned int count = 0;
for (unsigned i = start_v; i < len () && count < size; i++)
{
elt_t bits = v[i];
uint32_t v_base = base | (i << ELT_BITS_LOG_2);
for (unsigned int j = start_bit; j < ELT_BITS && count < size; j++)
{
if ((elt_t(1) << j) & bits) {
*p++ = v_base | j;
count++;
}
}
start_bit = 0;
}
return count;
}
// Writes out the values NOT in this page to the array p. Returns the
// number of values written. At most size codepoints will be written.
// Returns the number of codepoints written. next_value holds the next value
// that should be written (if not present in this page). This is used to fill
// any missing value gaps between this page and the previous page, if any.
// next_value is updated to one more than the last value present in this page.
unsigned int write_inverted (uint32_t base,
unsigned int start_value,
hb_codepoint_t *p,
unsigned int size,
hb_codepoint_t *next_value) const
{
unsigned int start_v = start_value >> ELT_BITS_LOG_2;
unsigned int start_bit = start_value & ELT_MASK;
unsigned int count = 0;
for (unsigned i = start_v; i < len () && count < size; i++)
{
elt_t bits = v[i];
uint32_t v_offset = i << ELT_BITS_LOG_2;
for (unsigned int j = start_bit; j < ELT_BITS && count < size; j++)
{
if ((elt_t(1) << j) & bits)
{
hb_codepoint_t value = base | v_offset | j;
// Emit all the missing values from next_value up to value - 1.
for (hb_codepoint_t k = *next_value; k < value && count < size; k++)
{
*p++ = k;
count++;
}
// Skip over this value;
*next_value = value + 1;
}
}
start_bit = 0;
}
return count;
}
bool is_equal (const hb_bit_page_t &other) const
{
return 0 == hb_memcmp (&v, &other.v, sizeof (v));
@ -181,6 +247,7 @@ struct hb_bit_page_t
static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
static constexpr unsigned PAGE_BITS_LOG_2 = 9;
static_assert (1 << PAGE_BITS_LOG_2 == PAGE_BITS, "");
static constexpr unsigned PAGE_BITMASK = PAGE_BITS - 1;
static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); }
static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; }
@ -190,8 +257,8 @@ struct hb_bit_page_t
static constexpr unsigned ELT_BITS = sizeof (elt_t) * 8;
static constexpr unsigned ELT_BITS_LOG_2 = 6;
static_assert (1 << ELT_BITS_LOG_2 == ELT_BITS, "");
static constexpr unsigned ELT_MASK = ELT_BITS - 1;
static constexpr unsigned BITS = sizeof (vector_t) * 8;
static constexpr unsigned MASK = BITS - 1;
static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, "");

View File

@ -323,6 +323,14 @@ struct hb_bit_set_invertible_t
return true;
}
unsigned int set_next_many (hb_codepoint_t codepoint,
hb_codepoint_t *out,
unsigned int size) const
{
return inverted ? s.set_next_many_inverted(codepoint, out, size)
: s.set_next_many(codepoint, out, size);
}
static constexpr hb_codepoint_t INVALID = hb_bit_set_t::INVALID;
/*

View File

@ -700,6 +700,99 @@ struct hb_bit_set_t
return true;
}
unsigned int set_next_many (hb_codepoint_t codepoint,
hb_codepoint_t *out,
unsigned int size) const
{
// By default, start at the first bit of the first page of values.
unsigned int start_page = 0;
unsigned int start_page_value = 0;
if (unlikely (codepoint != INVALID))
{
const auto* page_map_array = page_map.arrayZ;
unsigned int major = get_major (codepoint);
unsigned int i = last_page_lookup;
if (unlikely (i >= page_map.length || page_map_array[i].major != major))
{
page_map.bfind (major, &i, HB_NOT_FOUND_STORE_CLOSEST);
if (i >= page_map.length)
return 0; // codepoint is greater than our max element.
}
start_page = i;
start_page_value = page_remainder (codepoint + 1);
if (unlikely (start_page_value == 0))
{
// The export-after value was last in the page. Start on next page.
start_page++;
start_page_value = 0;
}
}
unsigned int initial_size = size;
for (unsigned int i = start_page; i < page_map.length && size; i++)
{
uint32_t base = major_start (page_map[i].major);
unsigned int n = pages[page_map[i].index].write (base, start_page_value, out, size);
out += n;
size -= n;
start_page_value = 0;
}
return initial_size - size;
}
unsigned int set_next_many_inverted (hb_codepoint_t codepoint,
hb_codepoint_t *out,
unsigned int size) const
{
unsigned int initial_size = size;
// By default, start at the first bit of the first page of values.
unsigned int start_page = 0;
unsigned int start_page_value = 0;
if (unlikely (codepoint != INVALID))
{
const auto* page_map_array = page_map.arrayZ;
unsigned int major = get_major (codepoint);
unsigned int i = last_page_lookup;
if (unlikely (i >= page_map.length || page_map_array[i].major != major))
{
page_map.bfind(major, &i, HB_NOT_FOUND_STORE_CLOSEST);
if (unlikely (i >= page_map.length))
{
// codepoint is greater than our max element.
while (++codepoint != INVALID && size)
{
*out++ = codepoint;
size--;
}
return initial_size - size;
}
}
start_page = i;
start_page_value = page_remainder (codepoint + 1);
if (unlikely (start_page_value == 0))
{
// The export-after value was last in the page. Start on next page.
start_page++;
start_page_value = 0;
}
}
hb_codepoint_t next_value = codepoint + 1;
for (unsigned int i=start_page; i<page_map.length && size; i++)
{
uint32_t base = major_start (page_map[i].major);
unsigned int n = pages[page_map[i].index].write_inverted (base, start_page_value, out, size, &next_value);
out += n;
size -= n;
start_page_value = 0;
}
while (next_value < HB_SET_VALUE_INVALID && size) {
*out++ = next_value++;
size--;
}
return initial_size - size;
}
bool has_population () const { return population != UINT_MAX; }
unsigned int get_population () const
{
@ -810,6 +903,7 @@ struct hb_bit_set_t
page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }
unsigned int get_major (hb_codepoint_t g) const { return g >> page_t::PAGE_BITS_LOG_2; }
unsigned int page_remainder (hb_codepoint_t g) const { return g & page_t::PAGE_BITMASK; }
hb_codepoint_t major_start (unsigned int major) const { return major << page_t::PAGE_BITS_LOG_2; }
};

View File

@ -614,3 +614,28 @@ hb_set_previous_range (const hb_set_t *set,
{
return set->previous_range (first, last);
}
/**
* hb_set_next_many:
* @set: A set
* @codepoint: Outputting codepoints starting after this one.
* Use HB_SET_VALUE_INVALID to get started.
* @out: An array of codepoints to write to.
* @size: The maximum number of codepoints to write out.
*
* Finds the next element in @set that is greater than @codepoint. Writes out
* codepoints to @out, until either the set runs out of elements, or @size
* codepoints are written, whichever comes first.
*
* Return value: the number of values written.
*
* Since: REPLACEME
**/
unsigned int
hb_set_next_many (const hb_set_t *set,
hb_codepoint_t codepoint,
hb_codepoint_t *out,
unsigned int size)
{
return set->set_next_many (codepoint, out, size);
}

View File

@ -185,6 +185,12 @@ hb_set_previous_range (const hb_set_t *set,
hb_codepoint_t *first,
hb_codepoint_t *last);
/* Pass HB_SET_VALUE_INVALID in to get started. */
HB_EXTERN unsigned int
hb_set_next_many (const hb_set_t *set,
hb_codepoint_t codepoint,
hb_codepoint_t *out,
unsigned int size);
HB_END_DECLS

View File

@ -109,6 +109,7 @@ struct hb_sparseset_t
typedef bool value_t;
value_t operator [] (hb_codepoint_t k) const { return get (k); }
bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; }
/* Predicate. */
bool operator () (hb_codepoint_t k) const { return has (k); }
@ -138,6 +139,8 @@ struct hb_sparseset_t
{ return s.next_range (first, last); }
bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const
{ return s.previous_range (first, last); }
unsigned int set_next_many (hb_codepoint_t codepoint, hb_codepoint_t *out, unsigned int size) const
{ return s.set_next_many(codepoint, out, size); }
unsigned int get_population () const { return s.get_population (); }
hb_codepoint_t get_min () const { return s.get_min (); }

View File

@ -1084,6 +1084,113 @@ test_hb_set_add_sorted_array (void)
hb_set_destroy (set);
}
static void
test_set_next_many (void)
{
hb_set_t *set = hb_set_create ();
for (int i=0; i<600; i++)
hb_set_add (set, i);
for (int i=6000; i<6100; i++)
hb_set_add (set, i);
g_assert (hb_set_get_population (set) == 700);
hb_codepoint_t array[700];
unsigned int n = hb_set_next_many (set, HB_SET_VALUE_INVALID, array, 700);
g_assert_cmpint(n, ==, 700);
for (int i=0; i<600; i++)
g_assert_cmpint (array[i], ==, i);
for (int i=0; i<100; i++)
g_assert (array[600 + i] == 6000 + i);
// Try skipping initial values.
for (int i = 0; i < 700; i++)
array[i] = 0;
n = hb_set_next_many (set, 42, array, 700);
g_assert_cmpint (n, ==, 657);
g_assert_cmpint (array[0], ==, 43);
g_assert_cmpint (array[n - 1], ==, 6099);
hb_set_destroy (set);
}
static void
test_set_next_many_restricted (void)
{
hb_set_t *set = hb_set_create ();
for (int i=0; i<600; i++)
hb_set_add (set, i);
for (int i=6000; i<6100; i++)
hb_set_add (set, i);
g_assert (hb_set_get_population (set) == 700);
hb_codepoint_t array[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
hb_set_next_many (set, HB_SET_VALUE_INVALID, array, 9);
for (int i=0; i<9; i++)
g_assert_cmpint (array[i], ==, i);
g_assert_cmpint (array[9], ==, 0);
hb_set_destroy (set);
}
static void
test_set_next_many_inverted (void)
{
hb_set_t *set = hb_set_create ();
hb_set_add (set, 1);
hb_set_add (set, 3);
hb_set_invert (set);
hb_codepoint_t array[] = {0, 0, 0, 0, 0, 999};
// Single page.
hb_set_next_many (set, HB_SET_VALUE_INVALID, array, 5);
g_assert_cmpint (array[0], ==, 0);
g_assert_cmpint (array[1], ==, 2);
g_assert_cmpint (array[2], ==, 4);
g_assert_cmpint (array[3], ==, 5);
g_assert_cmpint (array[4], ==, 6);
g_assert_cmpint (array[5], ==, 999);
// Multiple pages.
hb_set_invert (set);
hb_set_add (set, 1000);
hb_set_invert (set);
hb_codepoint_t array2[1000];
hb_set_next_many (set, HB_SET_VALUE_INVALID, array2, 1000);
g_assert_cmpint (array2[0], ==, 0);
g_assert_cmpint (array2[1], ==, 2);
g_assert_cmpint (array2[2], ==, 4);
g_assert_cmpint (array2[3], ==, 5);
for (int i=4; i<997; i++)
{
g_assert_cmpint (array2[i], ==, i + 2);
}
g_assert_cmpint (array2[997], ==, 999);
// Value 1000 skipped.
g_assert_cmpint (array2[998], ==, 1001);
g_assert_cmpint (array2[999], ==, 1002);
hb_set_destroy (set);
}
static void
test_set_next_many_out_of_order_pages (void) {
hb_set_t* set = hb_set_create();
hb_set_add(set, 1957);
hb_set_add(set, 69);
hb_codepoint_t results[2];
unsigned int result_size = hb_set_next_many(set, HB_SET_VALUE_INVALID, results, 2);
g_assert_cmpint(result_size, == , 2);
g_assert_cmpint(results[0], == , 69);
g_assert_cmpint(results[1], == , 1957);
hb_set_destroy(set);
}
int
main (int argc, char **argv)
{
@ -1108,6 +1215,10 @@ main (int argc, char **argv)
hb_test_add (test_set_inverted_operations);
hb_test_add (test_hb_set_add_sorted_array);
hb_test_add (test_set_next_many);
hb_test_add (test_set_next_many_restricted);
hb_test_add (test_set_next_many_inverted);
hb_test_add (test_set_next_many_out_of_order_pages);
return hb_test_run();
}

Binary file not shown.