[simd] Implement 9ary search for RangeRecord
Is good on correctness. Have not measured performance yet. Part of https://github.com/harfbuzz/harfbuzz/issues/566
This commit is contained in:
parent
291d30b1ff
commit
6bba9d876a
|
@ -416,39 +416,47 @@ dump_use_data_SOURCES = dump-use-data.cc hb-ot-shape-complex-use-table.cc
|
||||||
dump_use_data_CPPFLAGS = $(HBCFLAGS)
|
dump_use_data_CPPFLAGS = $(HBCFLAGS)
|
||||||
dump_use_data_LDADD = libharfbuzz.la $(HBLIBS)
|
dump_use_data_LDADD = libharfbuzz.la $(HBLIBS)
|
||||||
|
|
||||||
COMPILED_TESTS = test-algs test-iter test-meta test-number test-ot-tag test-unicode-ranges test-bimap
|
|
||||||
COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG
|
|
||||||
COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS)
|
|
||||||
check_PROGRAMS += $(COMPILED_TESTS)
|
check_PROGRAMS += $(COMPILED_TESTS)
|
||||||
TESTS += $(COMPILED_TESTS)
|
TESTS += $(COMPILED_TESTS)
|
||||||
|
COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG
|
||||||
test_algs_SOURCES = test-algs.cc hb-static.cc
|
COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS)
|
||||||
|
COMPILED_TESTS_SOURCES = \
|
||||||
|
hb-static.cc \
|
||||||
|
$(NULL)
|
||||||
|
COMPILED_TESTS = \
|
||||||
|
test-algs \
|
||||||
|
test-iter \
|
||||||
|
test-meta \
|
||||||
|
test-number \
|
||||||
|
test-simd \
|
||||||
|
test-ot-tag \
|
||||||
|
test-unicode-ranges \
|
||||||
|
test-bimap \
|
||||||
|
$(NULL)
|
||||||
|
test_algs_SOURCES = test-algs.cc $(COMPILED_TESTS_SOURCES)
|
||||||
test_algs_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
test_algs_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||||
test_algs_LDADD = $(COMPILED_TESTS_LDADD)
|
test_algs_LDADD = $(COMPILED_TESTS_LDADD)
|
||||||
|
test_bimap_SOURCES = test-bimap.cc $(COMPILED_TESTS_SOURCES)
|
||||||
test_iter_SOURCES = test-iter.cc hb-static.cc
|
|
||||||
test_iter_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
|
||||||
test_iter_LDADD = $(COMPILED_TESTS_LDADD)
|
|
||||||
|
|
||||||
test_meta_SOURCES = test-meta.cc hb-static.cc
|
|
||||||
test_meta_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
|
||||||
test_meta_LDADD = $(COMPILED_TESTS_LDADD)
|
|
||||||
|
|
||||||
test_number_SOURCES = test-number.cc hb-number.cc
|
|
||||||
test_number_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
|
||||||
test_number_LDADD = $(COMPILED_TESTS_LDADD)
|
|
||||||
|
|
||||||
test_ot_tag_SOURCES = hb-ot-tag.cc
|
|
||||||
test_ot_tag_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
|
||||||
test_ot_tag_LDADD = $(COMPILED_TESTS_LDADD)
|
|
||||||
|
|
||||||
test_unicode_ranges_SOURCES = test-unicode-ranges.cc
|
|
||||||
test_unicode_ranges_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
|
||||||
test_unicode_ranges_LDADD = $(COMPILED_TESTS_LDADD)
|
|
||||||
|
|
||||||
test_bimap_SOURCES = test-bimap.cc hb-static.cc
|
|
||||||
test_bimap_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
test_bimap_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||||
test_bimap_LDADD = $(COMPILED_TESTS_LDADD)
|
test_bimap_LDADD = $(COMPILED_TESTS_LDADD)
|
||||||
|
test_iter_SOURCES = test-iter.cc $(COMPILED_TESTS_SOURCES)
|
||||||
|
test_iter_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||||
|
test_iter_LDADD = $(COMPILED_TESTS_LDADD)
|
||||||
|
test_meta_SOURCES = test-meta.cc $(COMPILED_TESTS_SOURCES)
|
||||||
|
test_meta_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||||
|
test_meta_LDADD = $(COMPILED_TESTS_LDADD)
|
||||||
|
test_number_SOURCES = test-number.cc $(COMPILED_TESTS_SOURCES)
|
||||||
|
test_number_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||||
|
test_number_LDADD = $(COMPILED_TESTS_LDADD)
|
||||||
|
test_ot_tag_SOURCES = hb-ot-tag.cc $(COMPILED_TESTS_SOURCES)
|
||||||
|
test_ot_tag_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||||
|
test_ot_tag_LDADD = $(COMPILED_TESTS_LDADD)
|
||||||
|
test_simd_SOURCES = test-simd.cc $(COMPILED_TESTS_SOURCES)
|
||||||
|
test_simd_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||||
|
test_simd_LDADD = $(COMPILED_TESTS_LDADD)
|
||||||
|
test_unicode_ranges_SOURCES = test-unicode-ranges.cc $(COMPILED_TESTS_SOURCES)
|
||||||
|
test_unicode_ranges_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||||
|
test_unicode_ranges_LDADD = $(COMPILED_TESTS_LDADD)
|
||||||
|
|
||||||
TESTS_ENVIRONMENT = \
|
TESTS_ENVIRONMENT = \
|
||||||
srcdir="$(srcdir)" \
|
srcdir="$(srcdir)" \
|
||||||
|
|
|
@ -346,19 +346,17 @@ struct hb_sorted_array_t :
|
||||||
_hb_cmp_method<T, Type>);
|
_hb_cmp_method<T, Type>);
|
||||||
}
|
}
|
||||||
#ifndef HB_NO_SIMD
|
#ifndef HB_NO_SIMD
|
||||||
#if 0
|
|
||||||
template <typename U = Type,
|
template <typename U = Type,
|
||||||
hb_enable_if (hb_is_same (hb_decay<U>, OT::RangeRecord))>
|
hb_enable_if (hb_is_same (hb_decay<U>, OT::RangeRecord))>
|
||||||
bool bsearch_impl (hb_codepoint_t x, unsigned *pos, hb_priority<1>) const
|
bool bsearch_impl (hb_codepoint_t x, unsigned *pos, hb_priority<1>) const
|
||||||
{
|
{
|
||||||
return hb_simd_bsearch_glyphid_range (pos,
|
return hb_simd_ksearch_glyphid_range (pos,
|
||||||
x,
|
x,
|
||||||
this->arrayZ,
|
this->arrayZ,
|
||||||
this->length,
|
this->length,
|
||||||
sizeof (Type));
|
sizeof (Type));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
template <typename T> inline hb_sorted_array_t<T>
|
template <typename T> inline hb_sorted_array_t<T>
|
||||||
hb_sorted_array (T *array, unsigned int length)
|
hb_sorted_array (T *array, unsigned int length)
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "hb.hh"
|
#include "hb.hh"
|
||||||
|
#include "hb-number.hh"
|
||||||
#include "hb-machinery.hh"
|
#include "hb-machinery.hh"
|
||||||
#include "hb-number.hh"
|
#include "hb-number.hh"
|
||||||
#include "hb-number-parser.hh"
|
#include "hb-number-parser.hh"
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
#include "hb.hh"
|
#include "hb.hh"
|
||||||
#include "hb-meta.hh"
|
#include "hb-meta.hh"
|
||||||
|
#include "hb-algs.hh"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* = MOTIVATION
|
* = MOTIVATION
|
||||||
|
@ -173,6 +174,12 @@
|
||||||
* might work just fine. Specially since the second one will be fulfilled
|
* might work just fine. Specially since the second one will be fulfilled
|
||||||
* straight from the L1 cachelines.
|
* straight from the L1 cachelines.
|
||||||
*
|
*
|
||||||
|
* Another snag I hit is that AVX2 only has signed comparisons, not unsigned.
|
||||||
|
* So we add a shift to convert uint16_t numbers to int16_t before comparing.
|
||||||
|
*
|
||||||
|
* Also note that the order of arguments to _mm256_set_epi32() and family
|
||||||
|
* is opposite of what I originally assumed. Docs are correct, just not
|
||||||
|
* what you assume.
|
||||||
*
|
*
|
||||||
* PREFETCH
|
* PREFETCH
|
||||||
*
|
*
|
||||||
|
@ -194,33 +201,81 @@
|
||||||
|
|
||||||
/* TODO: Test -mvzeroupper. */
|
/* TODO: Test -mvzeroupper. */
|
||||||
|
|
||||||
static __m256i x HB_UNUSED;
|
static inline bool
|
||||||
|
hb_simd_ksearch_glyphid_range (unsigned *pos, /* Out */
|
||||||
|
hb_codepoint_t k,
|
||||||
|
const void *base,
|
||||||
|
size_t length,
|
||||||
|
size_t stride)
|
||||||
|
{
|
||||||
|
if (unlikely (k & ~0xFFFF))
|
||||||
|
{
|
||||||
|
*pos = length;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
*pos = 0;
|
||||||
|
|
||||||
|
#define HB_2TIMES(x) (x), (x)
|
||||||
|
#define HB_4TIMES(x) HB_2TIMES(x), HB_2TIMES (x)
|
||||||
|
#define HB_8TIMES(x) HB_4TIMES(x), HB_4TIMES (x)
|
||||||
|
#define HB_16TIMES(x) HB_8TIMES (x), HB_8TIMES (x)
|
||||||
|
|
||||||
|
/* Find deptch of search tree. */
|
||||||
|
static const unsigned steps[] = {1, 9, 81, 729, 6561, 59049};
|
||||||
|
unsigned rank = 1;
|
||||||
|
while (rank < ARRAY_LENGTH (steps) && length >= steps[rank])
|
||||||
|
rank++;
|
||||||
|
|
||||||
|
static const __m256i _1x8 = _mm256_set_epi32 (HB_8TIMES (1));
|
||||||
|
static const __m256i stridex8 = _mm256_set_epi32 (HB_8TIMES (stride));
|
||||||
|
static const __m256i __1x8 = _mm256_set_epi32 (HB_8TIMES (-1));
|
||||||
|
static const __m256i _12345678 = _mm256_set_epi32 (8, 7, 6, 5, 4, 3, 2, 1);
|
||||||
|
static const __m256i __32768x16 = _mm256_set_epi16 (HB_16TIMES (-32768));
|
||||||
|
|
||||||
|
/* Set up key vector. */
|
||||||
|
const __m256i K = _mm256_add_epi16 (_mm256_set_epi16 (HB_16TIMES ((signed) k - 32768)), _1x8);
|
||||||
|
|
||||||
|
while (rank)
|
||||||
|
{
|
||||||
|
unsigned step = steps[--rank];
|
||||||
|
|
||||||
|
/* Load multiple ranges to test against. */
|
||||||
|
const unsigned limit = stride * length;
|
||||||
|
const __m256i limits = _mm256_set_epi32 (HB_8TIMES (limit));
|
||||||
|
const unsigned pitch = stride * step;
|
||||||
|
const __m256i pitches = _mm256_set_epi32 (HB_8TIMES (pitch));
|
||||||
|
const __m256i offsets = _mm256_sub_epi32 (_mm256_mullo_epi32 (pitches, _12345678), stridex8);
|
||||||
|
const __m256i mask = _mm256_cmpgt_epi32 (limits, offsets);
|
||||||
|
|
||||||
|
/* The actual load... */
|
||||||
|
__m256i V = _mm256_mask_i32gather_epi32 (__1x8, (const int *) base, offsets, mask, 1);
|
||||||
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||||
|
V = _mm256_add_epi16 (_mm256_slli_epi16 (V, 8),
|
||||||
|
_mm256_srli_epi16 (V, 8));
|
||||||
|
#endif
|
||||||
|
V = _mm256_add_epi16 (V, __32768x16);
|
||||||
|
|
||||||
|
/* Compare and locate. */
|
||||||
|
unsigned answer = hb_ctz (~_mm256_movemask_epi8 (_mm256_cmpgt_epi16 (K, V))) >> 1;
|
||||||
|
bool found = answer & 1;
|
||||||
|
answer = (answer + 1) >> 1;
|
||||||
|
unsigned move = step * answer;
|
||||||
|
*pos += move;
|
||||||
|
if (found)
|
||||||
|
{
|
||||||
|
*pos -= 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
length -= move;
|
||||||
|
base = (const void *) ((const char *) base + stride * move);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#elif !defined(HB_NO_SIMD)
|
#elif !defined(HB_NO_SIMD)
|
||||||
#define HB_NO_SIMD
|
#define HB_NO_SIMD
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Use to implement faster specializations.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef HB_NO_SIMD
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
static inline bool
|
|
||||||
hb_simd_bsearch_glyphid_range (unsigned *pos, /* Out */
|
|
||||||
hb_codepoint_t k,
|
|
||||||
const void *base,
|
|
||||||
size_t length,
|
|
||||||
size_t stride)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* HB_SIMD_HH */
|
#endif /* HB_SIMD_HH */
|
||||||
|
|
|
@ -604,11 +604,11 @@ struct BEInt<Type, 4>
|
||||||
#include "hb-meta.hh"
|
#include "hb-meta.hh"
|
||||||
#include "hb-mutex.hh"
|
#include "hb-mutex.hh"
|
||||||
#include "hb-number.hh"
|
#include "hb-number.hh"
|
||||||
#include "hb-simd.hh" // Requires: hb-meta
|
|
||||||
#include "hb-atomic.hh" // Requires: hb-meta
|
#include "hb-atomic.hh" // Requires: hb-meta
|
||||||
#include "hb-null.hh" // Requires: hb-meta
|
#include "hb-null.hh" // Requires: hb-meta
|
||||||
#include "hb-algs.hh" // Requires: hb-meta hb-null hb-number
|
#include "hb-algs.hh" // Requires: hb-meta hb-null hb-number
|
||||||
#include "hb-iter.hh" // Requires: hb-algs hb-meta
|
#include "hb-iter.hh" // Requires: hb-algs hb-meta
|
||||||
|
#include "hb-simd.hh" // Requires: hb-algs hb-meta
|
||||||
#include "hb-debug.hh" // Requires: hb-algs hb-atomic
|
#include "hb-debug.hh" // Requires: hb-algs hb-atomic
|
||||||
#include "hb-array.hh" // Requires: hb-algs hb-iter hb-null hb-simd
|
#include "hb-array.hh" // Requires: hb-algs hb-iter hb-null hb-simd
|
||||||
#include "hb-vector.hh" // Requires: hb-array hb-null
|
#include "hb-vector.hh" // Requires: hb-array hb-null
|
||||||
|
|
|
@ -24,8 +24,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "hb.hh"
|
#include "hb.hh"
|
||||||
#include "hb-number.hh"
|
#include "hb-number.cc"
|
||||||
#include "hb-number-parser.hh"
|
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
|
@ -0,0 +1,84 @@
|
||||||
|
/*
|
||||||
|
* Copyright © 2019 Facebook, Inc.
|
||||||
|
*
|
||||||
|
* This is part of HarfBuzz, a text shaping library.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, without written agreement and without
|
||||||
|
* license or royalty fees, to use, copy, modify, and distribute this
|
||||||
|
* software and its documentation for any purpose, provided that the
|
||||||
|
* above copyright notice and the following two paragraphs appear in
|
||||||
|
* all copies of this software.
|
||||||
|
*
|
||||||
|
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||||
|
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||||
|
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||||
|
* DAMAGE.
|
||||||
|
*
|
||||||
|
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||||
|
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||||
|
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||||
|
*
|
||||||
|
* Facebook Author(s): Behdad Esfahbod
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "hb.hh"
|
||||||
|
#include "hb-simd.hh"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
struct U16
|
||||||
|
{
|
||||||
|
U16 (unsigned v_)
|
||||||
|
{
|
||||||
|
v[0] = v_ >> 8;
|
||||||
|
v[1] = v_ & 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint8_t v[2];
|
||||||
|
};
|
||||||
|
|
||||||
|
int
|
||||||
|
main (int argc, char **argv)
|
||||||
|
{
|
||||||
|
|
||||||
|
const U16 a[] = {1, 2, 5, 10, 16, 19};
|
||||||
|
|
||||||
|
#define TEST(k, f, p) \
|
||||||
|
{ \
|
||||||
|
unsigned pos = 123456789; \
|
||||||
|
bool found = hb_simd_ksearch_glyphid_range (&pos, \
|
||||||
|
k, \
|
||||||
|
a, \
|
||||||
|
ARRAY_LENGTH (a) / 2, \
|
||||||
|
sizeof (a[0]) * 2); \
|
||||||
|
/*printf ("key %d found %d pos %d\n", k, found, pos);*/ \
|
||||||
|
assert (found == f && pos == p); \
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST (0, false, 0);
|
||||||
|
TEST (1, true , 0);
|
||||||
|
TEST (2, true , 0);
|
||||||
|
TEST (3, false, 1);
|
||||||
|
TEST (4, false, 1);
|
||||||
|
TEST (5, true , 1);
|
||||||
|
TEST (6, true , 1);
|
||||||
|
TEST (7, true , 1);
|
||||||
|
TEST (8, true , 1);
|
||||||
|
TEST (9, true , 1);
|
||||||
|
TEST (10, true , 1);
|
||||||
|
TEST (11, false, 2);
|
||||||
|
TEST (12, false, 2);
|
||||||
|
TEST (13, false, 2);
|
||||||
|
TEST (14, false, 2);
|
||||||
|
TEST (15, false, 2);
|
||||||
|
TEST (16, true , 2);
|
||||||
|
TEST (17, true , 2);
|
||||||
|
TEST (18, true , 2);
|
||||||
|
TEST (19, true , 2);
|
||||||
|
TEST (20, false, 3);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue