harfbuzz/src/hb-array.hh

492 lines
15 KiB
C++
Raw Normal View History

/*
* Copyright © 2018 Google, Inc.
*
* This is part of HarfBuzz, a text shaping library.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and its documentation for any purpose, provided that the
* above copyright notice and the following two paragraphs appear in
* all copies of this software.
*
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*
* Google Author(s): Behdad Esfahbod
*/
#ifndef HB_ARRAY_HH
#define HB_ARRAY_HH
#include "hb.hh"
2019-01-09 18:05:01 +01:00
#include "hb-algs.hh"
2018-12-21 23:35:58 +01:00
#include "hb-iter.hh"
#include "hb-null.hh"
template <typename Type>
struct hb_sorted_array_t;
2021-07-22 20:27:33 +02:00
enum hb_not_found_t
{
HB_NOT_FOUND_DONT_STORE,
HB_NOT_FOUND_STORE,
HB_NOT_FOUND_STORE_CLOSEST,
};
template <typename Type>
struct hb_array_t : hb_iter_with_fallback_t<hb_array_t<Type>, Type&>
{
2018-12-17 05:57:27 +01:00
/*
* Constructors.
*/
2021-11-02 04:55:14 +01:00
hb_array_t () = default;
hb_array_t (const hb_array_t&) = default;
~hb_array_t () = default;
hb_array_t& operator= (const hb_array_t&) = default;
hb_array_t& operator= (hb_array_t&&) = default;
constexpr hb_array_t (Type *array_, unsigned int length_) : arrayZ (array_), length (length_) {}
2019-05-10 01:08:10 +02:00
template <unsigned int length_>
constexpr hb_array_t (Type (&array_)[length_]) : hb_array_t (array_, length_) {}
template <typename U,
hb_enable_if (hb_is_cr_convertible(U, Type))>
constexpr hb_array_t (const hb_array_t<U> &o) :
2019-08-30 00:21:18 +02:00
hb_iter_with_fallback_t<hb_array_t, Type&> (),
2019-05-11 04:51:49 +02:00
arrayZ (o.arrayZ), length (o.length), backwards_length (o.backwards_length) {}
template <typename U,
hb_enable_if (hb_is_cr_convertible(U, Type))>
hb_array_t& operator = (const hb_array_t<U> &o)
2019-05-11 04:51:49 +02:00
{ arrayZ = o.arrayZ; length = o.length; backwards_length = o.backwards_length; return *this; }
2018-12-17 05:57:27 +01:00
/*
* Iterator implementation.
2018-12-17 05:57:27 +01:00
*/
typedef Type& __item_t__;
2019-01-25 15:34:03 +01:00
static constexpr bool is_random_access_iterator = true;
Type& __item_at__ (unsigned i) const
{
if (unlikely (i >= length)) return CrapOrNull (Type);
return arrayZ[i];
}
void __forward__ (unsigned n)
2018-12-17 05:45:07 +01:00
{
if (unlikely (n > length))
n = length;
length -= n;
2019-05-11 04:51:49 +02:00
backwards_length += n;
arrayZ += n;
2018-12-17 05:45:07 +01:00
}
void __rewind__ (unsigned n)
2018-12-17 06:20:19 +01:00
{
2019-05-11 04:51:49 +02:00
if (unlikely (n > backwards_length))
n = backwards_length;
length += n;
backwards_length -= n;
arrayZ -= n;
2018-12-17 06:20:19 +01:00
}
unsigned __len__ () const { return length; }
/* Ouch. The operator== compares the contents of the array. For range-based for loops,
* it's best if we can just compare arrayZ, though comparing contents is still fast,
* but also would require that Type has operator==. As such, we optimize this operator
2022-11-30 05:35:54 +01:00
* for range-based for loop and just compare arrayZ and length.
*
* The above comment is outdated now because we implemented separate begin/end to
* objects that were using hb_array_t for range-based loop before. */
bool operator != (const hb_array_t& o) const
2022-07-18 02:44:11 +02:00
{ return this->arrayZ != o.arrayZ || this->length != o.length; }
/* Faster range-based for loop without bounds-check. */
Type *begin () const { return arrayZ; }
Type *end () const { return arrayZ + length; }
/* Extra operators.
*/
Type * operator & () const { return arrayZ; }
operator hb_array_t<const Type> () { return hb_array_t<const Type> (arrayZ, length); }
template <typename T> operator T * () const { return arrayZ; }
2018-12-17 05:45:07 +01:00
HB_INTERNAL bool operator == (const hb_array_t &o) const;
2022-11-19 01:26:58 +01:00
uint32_t hash () const
{
uint32_t current = 0;
for (auto &v : *this)
current = current * 31 + hb_hash (v);
return current;
}
2018-12-17 05:57:27 +01:00
/*
* Compare, Sort, and Search.
*/
2018-12-17 05:57:27 +01:00
/* Note: our compare is NOT lexicographic; it also does NOT call Type::cmp. */
2019-08-30 00:21:18 +02:00
int cmp (const hb_array_t &a) const
{
2018-12-22 00:46:51 +01:00
if (length != a.length)
return (int) a.length - (int) length;
2018-12-17 05:57:27 +01:00
return hb_memcmp (a.arrayZ, arrayZ, get_size ());
}
HB_INTERNAL static int cmp (const void *pa, const void *pb)
2018-12-17 05:57:27 +01:00
{
2019-08-30 00:21:18 +02:00
hb_array_t *a = (hb_array_t *) pa;
hb_array_t *b = (hb_array_t *) pb;
2018-12-17 05:57:27 +01:00
return b->cmp (*a);
}
template <typename T>
Type *lsearch (const T &x, Type *not_found = nullptr)
{
2020-06-28 10:43:25 +02:00
unsigned i;
return lfind (x, &i) ? &this->arrayZ[i] : not_found;
}
template <typename T>
const Type *lsearch (const T &x, const Type *not_found = nullptr) const
{
2020-06-28 10:43:25 +02:00
unsigned i;
return lfind (x, &i) ? &this->arrayZ[i] : not_found;
}
template <typename T>
2021-07-22 20:27:33 +02:00
bool lfind (const T &x, unsigned *pos = nullptr,
hb_not_found_t not_found = HB_NOT_FOUND_DONT_STORE,
unsigned int to_store = (unsigned int) -1) const
2020-06-28 10:43:25 +02:00
{
for (unsigned i = 0; i < length; ++i)
if (hb_equal (x, this->arrayZ[i]))
2020-06-28 10:43:25 +02:00
{
if (pos)
*pos = i;
return true;
}
2021-07-22 20:27:33 +02:00
if (pos)
{
switch (not_found)
{
case HB_NOT_FOUND_DONT_STORE:
break;
case HB_NOT_FOUND_STORE:
*pos = to_store;
break;
case HB_NOT_FOUND_STORE_CLOSEST:
*pos = length;
break;
}
}
2020-06-28 10:43:25 +02:00
return false;
}
hb_sorted_array_t<Type> qsort (int (*cmp_)(const void*, const void*))
{
//static_assert (hb_enable_if (hb_is_trivially_copy_assignable(Type)), "");
if (likely (length))
hb_qsort (arrayZ, length, this->get_item_size (), cmp_);
return hb_sorted_array_t<Type> (*this);
}
hb_sorted_array_t<Type> qsort ()
{
//static_assert (hb_enable_if (hb_is_trivially_copy_assignable(Type)), "");
if (likely (length))
hb_qsort (arrayZ, length, this->get_item_size (), Type::cmp);
return hb_sorted_array_t<Type> (*this);
}
2018-12-17 05:57:27 +01:00
/*
* Other methods.
*/
unsigned int get_size () const { return length * this->get_item_size (); }
2018-12-17 05:57:27 +01:00
/*
* Reverse the order of items in this array in the range [start, end).
*/
void reverse (unsigned start = 0, unsigned end = -1)
2020-02-26 02:39:59 +01:00
{
start = hb_min (start, length);
end = hb_min (end, length);
if (end < start + 2)
return;
2022-07-13 23:13:07 +02:00
for (unsigned lhs = start, rhs = end - 1; lhs < rhs; lhs++, rhs--)
hb_swap (arrayZ[rhs], arrayZ[lhs]);
2020-02-26 02:39:59 +01:00
}
2019-08-30 00:21:18 +02:00
hb_array_t sub_array (unsigned int start_offset = 0, unsigned int *seg_count = nullptr /* IN/OUT */) const
{
2018-12-17 05:57:27 +01:00
if (!start_offset && !seg_count)
return *this;
2018-12-22 00:46:51 +01:00
unsigned int count = length;
2018-12-17 05:57:27 +01:00
if (unlikely (start_offset > count))
count = 0;
else
count -= start_offset;
if (seg_count)
count = *seg_count = hb_min (count, *seg_count);
2019-08-30 00:21:18 +02:00
return hb_array_t (arrayZ + start_offset, count);
}
2019-08-30 00:21:18 +02:00
hb_array_t sub_array (unsigned int start_offset, unsigned int seg_count) const
2018-12-17 05:57:27 +01:00
{ return sub_array (start_offset, &seg_count); }
2019-08-30 00:23:48 +02:00
hb_array_t truncate (unsigned length) const { return sub_array (0, length); }
template <typename T,
unsigned P = sizeof (Type),
hb_enable_if (P == 1)>
const T *as () const
{ return length < hb_min_size (T) ? &Null (T) : reinterpret_cast<const T *> (arrayZ); }
template <typename T,
unsigned P = sizeof (Type),
hb_enable_if (P == 1)>
bool check_range (const T *p, unsigned int size = T::static_size) const
{
return arrayZ <= ((const char *) p)
&& ((const char *) p) <= arrayZ + length
&& (unsigned int) (arrayZ + length - (const char *) p) >= size;
}
/* Only call if you allocated the underlying array using hb_malloc() or similar. */
void fini ()
{ hb_free ((void *) arrayZ); arrayZ = nullptr; length = 0; }
2022-11-25 22:23:57 +01:00
template <typename hb_serialize_context_t,
typename U = Type,
hb_enable_if (!(sizeof (U) < sizeof (long long) && hb_is_trivially_copy_assignable(hb_decay<Type>)))>
2019-05-09 01:37:38 +02:00
hb_array_t copy (hb_serialize_context_t *c) const
{
TRACE_SERIALIZE (this);
auto* out = c->start_embed (arrayZ);
if (unlikely (!c->extend_size (out, get_size (), false))) return_trace (hb_array_t ());
2019-05-09 01:37:38 +02:00
for (unsigned i = 0; i < length; i++)
out[i] = arrayZ[i]; /* TODO: add version that calls c->copy() */
return_trace (hb_array_t (out, length));
}
2022-11-25 22:23:57 +01:00
template <typename hb_serialize_context_t,
typename U = Type,
hb_enable_if (sizeof (U) < sizeof (long long) && hb_is_trivially_copy_assignable(hb_decay<Type>))>
2022-11-25 22:23:57 +01:00
hb_array_t copy (hb_serialize_context_t *c) const
{
TRACE_SERIALIZE (this);
auto* out = c->start_embed (arrayZ);
if (unlikely (!c->extend_size (out, get_size (), false))) return_trace (hb_array_t ());
2022-12-02 04:27:56 +01:00
hb_memcpy (out, arrayZ, get_size ());
2022-11-25 22:23:57 +01:00
return_trace (hb_array_t (out, length));
}
template <typename hb_sanitize_context_t>
bool sanitize (hb_sanitize_context_t *c) const
2018-12-22 00:46:51 +01:00
{ return c->check_array (arrayZ, length); }
2018-12-17 05:57:27 +01:00
/*
* Members
*/
public:
2021-11-02 04:55:14 +01:00
Type *arrayZ = nullptr;
unsigned int length = 0;
unsigned int backwards_length = 0;
};
template <typename T> inline hb_array_t<T>
2018-12-22 00:46:51 +01:00
hb_array (T *array, unsigned int length)
{ return hb_array_t<T> (array, length); }
template <typename T, unsigned int length_> inline hb_array_t<T>
hb_array (T (&array_)[length_])
{ return hb_array_t<T> (array_); }
template <typename Type>
struct hb_sorted_array_t :
hb_array_t<Type>,
hb_iter_t<hb_sorted_array_t<Type>, Type&>
{
2019-08-30 00:21:18 +02:00
typedef hb_iter_t<hb_sorted_array_t, Type&> iter_base_t;
HB_ITER_USING (iter_base_t);
2019-01-25 15:34:03 +01:00
static constexpr bool is_random_access_iterator = true;
static constexpr bool is_sorted_iterator = true;
2021-11-02 04:55:14 +01:00
hb_sorted_array_t () = default;
hb_sorted_array_t (const hb_sorted_array_t&) = default;
~hb_sorted_array_t () = default;
hb_sorted_array_t& operator= (const hb_sorted_array_t&) = default;
hb_sorted_array_t& operator= (hb_sorted_array_t&&) = default;
constexpr hb_sorted_array_t (Type *array_, unsigned int length_) : hb_array_t<Type> (array_, length_) {}
2019-05-10 01:08:10 +02:00
template <unsigned int length_>
constexpr hb_sorted_array_t (Type (&array_)[length_]) : hb_array_t<Type> (array_) {}
template <typename U,
hb_enable_if (hb_is_cr_convertible(U, Type))>
constexpr hb_sorted_array_t (const hb_array_t<U> &o) :
hb_array_t<Type> (o),
hb_iter_t<hb_sorted_array_t, Type&> () {}
template <typename U,
hb_enable_if (hb_is_cr_convertible(U, Type))>
hb_sorted_array_t& operator = (const hb_array_t<U> &o)
{ hb_array_t<Type> (*this) = o; return *this; }
/* Iterator implementation. */
2022-07-18 02:44:11 +02:00
/* See comment in hb_array_of::operator != */
bool operator != (const hb_sorted_array_t& o) const
{ return this->arrayZ != o.arrayZ || this->length != o.length; }
/* Faster range-based for loop without bounds-check. */
Type *begin () const { return this->arrayZ; }
Type *end () const { return this->arrayZ + this->length; }
2019-08-30 00:21:18 +02:00
hb_sorted_array_t sub_array (unsigned int start_offset, unsigned int *seg_count /* IN/OUT */) const
{ return hb_sorted_array_t (((const hb_array_t<Type> *) (this))->sub_array (start_offset, seg_count)); }
hb_sorted_array_t sub_array (unsigned int start_offset, unsigned int seg_count) const
{ return sub_array (start_offset, &seg_count); }
2019-08-30 00:23:48 +02:00
hb_sorted_array_t truncate (unsigned length) const { return sub_array (0, length); }
template <typename T>
Type *bsearch (const T &x, Type *not_found = nullptr)
{
unsigned int i;
return bfind (x, &i) ? &this->arrayZ[i] : not_found;
}
2022-08-19 00:13:56 +02:00
template <typename T>
const Type *bsearch (const T &x, const Type *not_found = nullptr) const
{
unsigned int i;
return bfind (x, &i) ? &this->arrayZ[i] : not_found;
}
template <typename T>
bool bfind (const T &x, unsigned int *i = nullptr,
2021-07-22 20:27:33 +02:00
hb_not_found_t not_found = HB_NOT_FOUND_DONT_STORE,
unsigned int to_store = (unsigned int) -1) const
{
unsigned pos;
if (bsearch_impl (x, &pos))
{
if (i)
*i = pos;
return true;
}
if (i)
{
switch (not_found)
{
2021-07-22 20:27:33 +02:00
case HB_NOT_FOUND_DONT_STORE:
break;
2021-07-22 20:27:33 +02:00
case HB_NOT_FOUND_STORE:
*i = to_store;
break;
2021-07-22 20:27:33 +02:00
case HB_NOT_FOUND_STORE_CLOSEST:
*i = pos;
break;
}
}
return false;
}
[ot-tags] Optimize language comparison Now that we know both strings are of equal len of 2 or 3, optimize. Part of https://github.com/harfbuzz/harfbuzz/issues/3591 Before: ------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------ BM_hb_ot_tags_from_script_and_language/COMMON abcd_XY 8.50 ns 8.47 ns 81221549 BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 79.6 ns 79.3 ns 8785804 BM_hb_ot_tags_from_script_and_language/COMMON en_US 40.0 ns 39.9 ns 17462768 BM_hb_ot_tags_from_script_and_language/LATIN en_US 39.2 ns 39.1 ns 17886793 BM_hb_ot_tags_from_script_and_language/COMMON none 4.31 ns 4.30 ns 162805417 BM_hb_ot_tags_from_script_and_language/LATIN none 4.32 ns 4.31 ns 162656688 After: ------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------ BM_hb_ot_tags_from_script_and_language/COMMON abcd_XY 8.27 ns 8.24 ns 81868701 BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 56.1 ns 56.0 ns 12353284 BM_hb_ot_tags_from_script_and_language/COMMON en_US 24.3 ns 24.2 ns 28955030 BM_hb_ot_tags_from_script_and_language/LATIN en_US 24.5 ns 24.4 ns 28664868 BM_hb_ot_tags_from_script_and_language/COMMON none 4.35 ns 4.34 ns 161190014 BM_hb_ot_tags_from_script_and_language/LATIN none 4.36 ns 4.34 ns 161319000
2022-05-17 23:19:40 +02:00
template <typename T, typename ...Ts>
bool bsearch_impl (const T &x, unsigned *pos, Ts... ds) const
2019-12-08 05:01:13 +01:00
{
return hb_bsearch_impl (pos,
x,
this->arrayZ,
this->length,
sizeof (Type),
[ot-tags] Optimize language comparison Now that we know both strings are of equal len of 2 or 3, optimize. Part of https://github.com/harfbuzz/harfbuzz/issues/3591 Before: ------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------ BM_hb_ot_tags_from_script_and_language/COMMON abcd_XY 8.50 ns 8.47 ns 81221549 BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 79.6 ns 79.3 ns 8785804 BM_hb_ot_tags_from_script_and_language/COMMON en_US 40.0 ns 39.9 ns 17462768 BM_hb_ot_tags_from_script_and_language/LATIN en_US 39.2 ns 39.1 ns 17886793 BM_hb_ot_tags_from_script_and_language/COMMON none 4.31 ns 4.30 ns 162805417 BM_hb_ot_tags_from_script_and_language/LATIN none 4.32 ns 4.31 ns 162656688 After: ------------------------------------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------------------------------------ BM_hb_ot_tags_from_script_and_language/COMMON abcd_XY 8.27 ns 8.24 ns 81868701 BM_hb_ot_tags_from_script_and_language/COMMON zh_CN 56.1 ns 56.0 ns 12353284 BM_hb_ot_tags_from_script_and_language/COMMON en_US 24.3 ns 24.2 ns 28955030 BM_hb_ot_tags_from_script_and_language/LATIN en_US 24.5 ns 24.4 ns 28664868 BM_hb_ot_tags_from_script_and_language/COMMON none 4.35 ns 4.34 ns 161190014 BM_hb_ot_tags_from_script_and_language/LATIN none 4.36 ns 4.34 ns 161319000
2022-05-17 23:19:40 +02:00
_hb_cmp_method<T, Type, Ts...>,
2022-08-19 00:15:00 +02:00
std::forward<Ts> (ds)...);
2019-12-08 05:01:13 +01:00
}
};
template <typename T> inline hb_sorted_array_t<T>
2018-12-22 00:46:51 +01:00
hb_sorted_array (T *array, unsigned int length)
{ return hb_sorted_array_t<T> (array, length); }
template <typename T, unsigned int length_> inline hb_sorted_array_t<T>
hb_sorted_array (T (&array_)[length_])
{ return hb_sorted_array_t<T> (array_); }
template <typename T>
inline bool hb_array_t<T>::operator == (const hb_array_t<T> &o) const
{
if (o.length != this->length) return false;
for (unsigned int i = 0; i < this->length; i++) {
if (this->arrayZ[i] != o.arrayZ[i]) return false;
}
return true;
}
template <>
inline bool hb_array_t<const char>::operator == (const hb_array_t<const char> &o) const
{
if (o.length != this->length) return false;
return 0 == hb_memcmp (arrayZ, o.arrayZ, length);
}
template <>
inline bool hb_array_t<const unsigned char>::operator == (const hb_array_t<const unsigned char> &o) const
{
if (o.length != this->length) return false;
return 0 == hb_memcmp (arrayZ, o.arrayZ, length);
}
/* Specialize hash() for byte arrays. */
template <>
inline uint32_t hb_array_t<const char>::hash () const
{
uint32_t current = 0;
unsigned i = 0;
#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \
((defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__))
struct __attribute__((packed)) packed_uint32_t { uint32_t v; };
for (; i + 4 <= this->length; i += 4)
current = current * 31 + hb_hash ((uint32_t) ((packed_uint32_t *) &this->arrayZ[i])->v);
#endif
for (; i < this->length; i++)
current = current * 31 + hb_hash (this->arrayZ[i]);
return current;
}
template <>
inline uint32_t hb_array_t<const unsigned char>::hash () const
{
uint32_t current = 0;
unsigned i = 0;
#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \
((defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__))
struct __attribute__((packed)) packed_uint32_t { uint32_t v; };
for (; i + 4 <= this->length; i += 4)
current = current * 31 + hb_hash ((uint32_t) ((packed_uint32_t *) &this->arrayZ[i])->v);
#endif
for (; i < this->length; i++)
current = current * 31 + hb_hash (this->arrayZ[i]);
return current;
}
typedef hb_array_t<const char> hb_bytes_t;
typedef hb_array_t<const unsigned char> hb_ubytes_t;
#endif /* HB_ARRAY_HH */