diff --git a/src/hb-bit-page.hh b/src/hb-bit-page.hh index e0c2058e6..975983665 100644 --- a/src/hb-bit-page.hh +++ b/src/hb-bit-page.hh @@ -86,6 +86,72 @@ struct hb_bit_page_t void set_range (hb_codepoint_t a, hb_codepoint_t b, bool v) { if (v) add_range (a, b); else del_range (a, b); } + + // Writes out page values to the array p. Returns the number of values + // written. At most size codepoints will be written. + unsigned int write (uint32_t base, + unsigned int start_value, + hb_codepoint_t *p, + unsigned int size) const + { + unsigned int start_v = start_value >> ELT_BITS_LOG_2; + unsigned int start_bit = start_value & ELT_MASK; + unsigned int count = 0; + for (unsigned i = start_v; i < len () && count < size; i++) + { + elt_t bits = v[i]; + uint32_t v_base = base | (i << ELT_BITS_LOG_2); + for (unsigned int j = start_bit; j < ELT_BITS && count < size; j++) + { + if ((elt_t(1) << j) & bits) { + *p++ = v_base | j; + count++; + } + } + start_bit = 0; + } + return count; + } + + // Writes out the values NOT in this page to the array p. Returns the + // number of values written. At most size codepoints will be written. + // Returns the number of codepoints written. next_value holds the next value + // that should be written (if not present in this page). This is used to fill + // any missing value gaps between this page and the previous page, if any. + // next_value is updated to one more than the last value present in this page. + unsigned int write_inverted (uint32_t base, + unsigned int start_value, + hb_codepoint_t *p, + unsigned int size, + hb_codepoint_t *next_value) const + { + unsigned int start_v = start_value >> ELT_BITS_LOG_2; + unsigned int start_bit = start_value & ELT_MASK; + unsigned int count = 0; + for (unsigned i = start_v; i < len () && count < size; i++) + { + elt_t bits = v[i]; + uint32_t v_offset = i << ELT_BITS_LOG_2; + for (unsigned int j = start_bit; j < ELT_BITS && count < size; j++) + { + if ((elt_t(1) << j) & bits) + { + hb_codepoint_t value = base | v_offset | j; + // Emit all the missing values from next_value up to value - 1. + for (hb_codepoint_t k = *next_value; k < value && count < size; k++) + { + *p++ = k; + count++; + } + // Skip over this value; + *next_value = value + 1; + } + } + start_bit = 0; + } + return count; + } + bool is_equal (const hb_bit_page_t &other) const { return 0 == hb_memcmp (&v, &other.v, sizeof (v)); @@ -181,6 +247,7 @@ struct hb_bit_page_t static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, ""); static constexpr unsigned PAGE_BITS_LOG_2 = 9; static_assert (1 << PAGE_BITS_LOG_2 == PAGE_BITS, ""); + static constexpr unsigned PAGE_BITMASK = PAGE_BITS - 1; static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); } static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; } @@ -190,8 +257,8 @@ struct hb_bit_page_t static constexpr unsigned ELT_BITS = sizeof (elt_t) * 8; static constexpr unsigned ELT_BITS_LOG_2 = 6; static_assert (1 << ELT_BITS_LOG_2 == ELT_BITS, ""); - static constexpr unsigned ELT_MASK = ELT_BITS - 1; + static constexpr unsigned BITS = sizeof (vector_t) * 8; static constexpr unsigned MASK = BITS - 1; static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, ""); diff --git a/src/hb-bit-set-invertible.hh b/src/hb-bit-set-invertible.hh index 0832b0fc2..8dab90f56 100644 --- a/src/hb-bit-set-invertible.hh +++ b/src/hb-bit-set-invertible.hh @@ -323,6 +323,14 @@ struct hb_bit_set_invertible_t return true; } + unsigned int set_next_many (hb_codepoint_t codepoint, + hb_codepoint_t *out, + unsigned int size) const + { + return inverted ? s.set_next_many_inverted(codepoint, out, size) + : s.set_next_many(codepoint, out, size); + } + static constexpr hb_codepoint_t INVALID = hb_bit_set_t::INVALID; /* diff --git a/src/hb-bit-set.hh b/src/hb-bit-set.hh index 84e80885e..0d7ca3f6d 100644 --- a/src/hb-bit-set.hh +++ b/src/hb-bit-set.hh @@ -700,6 +700,99 @@ struct hb_bit_set_t return true; } + unsigned int set_next_many (hb_codepoint_t codepoint, + hb_codepoint_t *out, + unsigned int size) const + { + // By default, start at the first bit of the first page of values. + unsigned int start_page = 0; + unsigned int start_page_value = 0; + if (unlikely (codepoint != INVALID)) + { + const auto* page_map_array = page_map.arrayZ; + unsigned int major = get_major (codepoint); + unsigned int i = last_page_lookup; + if (unlikely (i >= page_map.length || page_map_array[i].major != major)) + { + page_map.bfind (major, &i, HB_NOT_FOUND_STORE_CLOSEST); + if (i >= page_map.length) + return 0; // codepoint is greater than our max element. + } + start_page = i; + start_page_value = page_remainder (codepoint + 1); + if (unlikely (start_page_value == 0)) + { + // The export-after value was last in the page. Start on next page. + start_page++; + start_page_value = 0; + } + } + + unsigned int initial_size = size; + for (unsigned int i = start_page; i < page_map.length && size; i++) + { + uint32_t base = major_start (page_map[i].major); + unsigned int n = pages[page_map[i].index].write (base, start_page_value, out, size); + out += n; + size -= n; + start_page_value = 0; + } + return initial_size - size; + } + + unsigned int set_next_many_inverted (hb_codepoint_t codepoint, + hb_codepoint_t *out, + unsigned int size) const + { + unsigned int initial_size = size; + // By default, start at the first bit of the first page of values. + unsigned int start_page = 0; + unsigned int start_page_value = 0; + if (unlikely (codepoint != INVALID)) + { + const auto* page_map_array = page_map.arrayZ; + unsigned int major = get_major (codepoint); + unsigned int i = last_page_lookup; + if (unlikely (i >= page_map.length || page_map_array[i].major != major)) + { + page_map.bfind(major, &i, HB_NOT_FOUND_STORE_CLOSEST); + if (unlikely (i >= page_map.length)) + { + // codepoint is greater than our max element. + while (++codepoint != INVALID && size) + { + *out++ = codepoint; + size--; + } + return initial_size - size; + } + } + start_page = i; + start_page_value = page_remainder (codepoint + 1); + if (unlikely (start_page_value == 0)) + { + // The export-after value was last in the page. Start on next page. + start_page++; + start_page_value = 0; + } + } + + hb_codepoint_t next_value = codepoint + 1; + for (unsigned int i=start_page; i> page_t::PAGE_BITS_LOG_2; } + unsigned int page_remainder (hb_codepoint_t g) const { return g & page_t::PAGE_BITMASK; } hb_codepoint_t major_start (unsigned int major) const { return major << page_t::PAGE_BITS_LOG_2; } }; diff --git a/src/hb-set.cc b/src/hb-set.cc index 79fb9144e..3146d6ffb 100644 --- a/src/hb-set.cc +++ b/src/hb-set.cc @@ -614,3 +614,28 @@ hb_set_previous_range (const hb_set_t *set, { return set->previous_range (first, last); } + +/** + * hb_set_next_many: + * @set: A set + * @codepoint: Outputting codepoints starting after this one. + * Use HB_SET_VALUE_INVALID to get started. + * @out: An array of codepoints to write to. + * @size: The maximum number of codepoints to write out. + * + * Finds the next element in @set that is greater than @codepoint. Writes out + * codepoints to @out, until either the set runs out of elements, or @size + * codepoints are written, whichever comes first. + * + * Return value: the number of values written. + * + * Since: REPLACEME + **/ +unsigned int +hb_set_next_many (const hb_set_t *set, + hb_codepoint_t codepoint, + hb_codepoint_t *out, + unsigned int size) +{ + return set->set_next_many (codepoint, out, size); +} diff --git a/src/hb-set.h b/src/hb-set.h index adf34fb6a..10ce7c10d 100644 --- a/src/hb-set.h +++ b/src/hb-set.h @@ -185,6 +185,12 @@ hb_set_previous_range (const hb_set_t *set, hb_codepoint_t *first, hb_codepoint_t *last); +/* Pass HB_SET_VALUE_INVALID in to get started. */ +HB_EXTERN unsigned int +hb_set_next_many (const hb_set_t *set, + hb_codepoint_t codepoint, + hb_codepoint_t *out, + unsigned int size); HB_END_DECLS diff --git a/src/hb-set.hh b/src/hb-set.hh index af02e9e12..5668b4ad0 100644 --- a/src/hb-set.hh +++ b/src/hb-set.hh @@ -109,6 +109,7 @@ struct hb_sparseset_t typedef bool value_t; value_t operator [] (hb_codepoint_t k) const { return get (k); } bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; } + /* Predicate. */ bool operator () (hb_codepoint_t k) const { return has (k); } @@ -138,6 +139,8 @@ struct hb_sparseset_t { return s.next_range (first, last); } bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const { return s.previous_range (first, last); } + unsigned int set_next_many (hb_codepoint_t codepoint, hb_codepoint_t *out, unsigned int size) const + { return s.set_next_many(codepoint, out, size); } unsigned int get_population () const { return s.get_population (); } hb_codepoint_t get_min () const { return s.get_min (); } diff --git a/test/api/test-set.c b/test/api/test-set.c index 4ac95d9bb..8b3fa038b 100644 --- a/test/api/test-set.c +++ b/test/api/test-set.c @@ -1084,6 +1084,113 @@ test_hb_set_add_sorted_array (void) hb_set_destroy (set); } +static void +test_set_next_many (void) +{ + hb_set_t *set = hb_set_create (); + for (int i=0; i<600; i++) + hb_set_add (set, i); + for (int i=6000; i<6100; i++) + hb_set_add (set, i); + g_assert (hb_set_get_population (set) == 700); + hb_codepoint_t array[700]; + + unsigned int n = hb_set_next_many (set, HB_SET_VALUE_INVALID, array, 700); + + g_assert_cmpint(n, ==, 700); + for (int i=0; i<600; i++) + g_assert_cmpint (array[i], ==, i); + for (int i=0; i<100; i++) + g_assert (array[600 + i] == 6000 + i); + + // Try skipping initial values. + for (int i = 0; i < 700; i++) + array[i] = 0; + + n = hb_set_next_many (set, 42, array, 700); + + g_assert_cmpint (n, ==, 657); + g_assert_cmpint (array[0], ==, 43); + g_assert_cmpint (array[n - 1], ==, 6099); + + hb_set_destroy (set); +} + +static void +test_set_next_many_restricted (void) +{ + hb_set_t *set = hb_set_create (); + for (int i=0; i<600; i++) + hb_set_add (set, i); + for (int i=6000; i<6100; i++) + hb_set_add (set, i); + g_assert (hb_set_get_population (set) == 700); + hb_codepoint_t array[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + hb_set_next_many (set, HB_SET_VALUE_INVALID, array, 9); + + for (int i=0; i<9; i++) + g_assert_cmpint (array[i], ==, i); + g_assert_cmpint (array[9], ==, 0); + hb_set_destroy (set); +} + +static void +test_set_next_many_inverted (void) +{ + hb_set_t *set = hb_set_create (); + hb_set_add (set, 1); + hb_set_add (set, 3); + hb_set_invert (set); + + hb_codepoint_t array[] = {0, 0, 0, 0, 0, 999}; + + // Single page. + hb_set_next_many (set, HB_SET_VALUE_INVALID, array, 5); + + g_assert_cmpint (array[0], ==, 0); + g_assert_cmpint (array[1], ==, 2); + g_assert_cmpint (array[2], ==, 4); + g_assert_cmpint (array[3], ==, 5); + g_assert_cmpint (array[4], ==, 6); + g_assert_cmpint (array[5], ==, 999); + + // Multiple pages. + hb_set_invert (set); + hb_set_add (set, 1000); + hb_set_invert (set); + + hb_codepoint_t array2[1000]; + hb_set_next_many (set, HB_SET_VALUE_INVALID, array2, 1000); + g_assert_cmpint (array2[0], ==, 0); + g_assert_cmpint (array2[1], ==, 2); + g_assert_cmpint (array2[2], ==, 4); + g_assert_cmpint (array2[3], ==, 5); + for (int i=4; i<997; i++) + { + g_assert_cmpint (array2[i], ==, i + 2); + } + g_assert_cmpint (array2[997], ==, 999); + // Value 1000 skipped. + g_assert_cmpint (array2[998], ==, 1001); + g_assert_cmpint (array2[999], ==, 1002); + + hb_set_destroy (set); +} + +static void +test_set_next_many_out_of_order_pages (void) { + hb_set_t* set = hb_set_create(); + hb_set_add(set, 1957); + hb_set_add(set, 69); + hb_codepoint_t results[2]; + unsigned int result_size = hb_set_next_many(set, HB_SET_VALUE_INVALID, results, 2); + g_assert_cmpint(result_size, == , 2); + g_assert_cmpint(results[0], == , 69); + g_assert_cmpint(results[1], == , 1957); + hb_set_destroy(set); +} + int main (int argc, char **argv) { @@ -1108,6 +1215,10 @@ main (int argc, char **argv) hb_test_add (test_set_inverted_operations); hb_test_add (test_hb_set_add_sorted_array); + hb_test_add (test_set_next_many); + hb_test_add (test_set_next_many_restricted); + hb_test_add (test_set_next_many_inverted); + hb_test_add (test_set_next_many_out_of_order_pages); return hb_test_run(); } diff --git a/test/subset/__pycache__/repack_test.cpython-39.pyc b/test/subset/__pycache__/repack_test.cpython-39.pyc new file mode 100644 index 000000000..944f342ab Binary files /dev/null and b/test/subset/__pycache__/repack_test.cpython-39.pyc differ diff --git a/test/subset/__pycache__/subset_test_suite.cpython-39.pyc b/test/subset/__pycache__/subset_test_suite.cpython-39.pyc new file mode 100644 index 000000000..0aae66909 Binary files /dev/null and b/test/subset/__pycache__/subset_test_suite.cpython-39.pyc differ