[simd] Implement 9ary search for RangeRecord
Is good on correctness. Have not measured performance yet. Part of https://github.com/harfbuzz/harfbuzz/issues/566
This commit is contained in:
parent
291d30b1ff
commit
6bba9d876a
|
@ -416,39 +416,47 @@ dump_use_data_SOURCES = dump-use-data.cc hb-ot-shape-complex-use-table.cc
|
|||
dump_use_data_CPPFLAGS = $(HBCFLAGS)
|
||||
dump_use_data_LDADD = libharfbuzz.la $(HBLIBS)
|
||||
|
||||
COMPILED_TESTS = test-algs test-iter test-meta test-number test-ot-tag test-unicode-ranges test-bimap
|
||||
COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG
|
||||
COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS)
|
||||
check_PROGRAMS += $(COMPILED_TESTS)
|
||||
TESTS += $(COMPILED_TESTS)
|
||||
|
||||
test_algs_SOURCES = test-algs.cc hb-static.cc
|
||||
COMPILED_TESTS_CPPFLAGS = $(HBCFLAGS) -DMAIN -UNDEBUG
|
||||
COMPILED_TESTS_LDADD = libharfbuzz.la $(HBLIBS)
|
||||
COMPILED_TESTS_SOURCES = \
|
||||
hb-static.cc \
|
||||
$(NULL)
|
||||
COMPILED_TESTS = \
|
||||
test-algs \
|
||||
test-iter \
|
||||
test-meta \
|
||||
test-number \
|
||||
test-simd \
|
||||
test-ot-tag \
|
||||
test-unicode-ranges \
|
||||
test-bimap \
|
||||
$(NULL)
|
||||
test_algs_SOURCES = test-algs.cc $(COMPILED_TESTS_SOURCES)
|
||||
test_algs_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_algs_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
|
||||
test_iter_SOURCES = test-iter.cc hb-static.cc
|
||||
test_iter_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_iter_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
|
||||
test_meta_SOURCES = test-meta.cc hb-static.cc
|
||||
test_meta_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_meta_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
|
||||
test_number_SOURCES = test-number.cc hb-number.cc
|
||||
test_number_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_number_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
|
||||
test_ot_tag_SOURCES = hb-ot-tag.cc
|
||||
test_ot_tag_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_ot_tag_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
|
||||
test_unicode_ranges_SOURCES = test-unicode-ranges.cc
|
||||
test_unicode_ranges_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_unicode_ranges_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
|
||||
test_bimap_SOURCES = test-bimap.cc hb-static.cc
|
||||
test_bimap_SOURCES = test-bimap.cc $(COMPILED_TESTS_SOURCES)
|
||||
test_bimap_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_bimap_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
test_iter_SOURCES = test-iter.cc $(COMPILED_TESTS_SOURCES)
|
||||
test_iter_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_iter_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
test_meta_SOURCES = test-meta.cc $(COMPILED_TESTS_SOURCES)
|
||||
test_meta_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_meta_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
test_number_SOURCES = test-number.cc $(COMPILED_TESTS_SOURCES)
|
||||
test_number_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_number_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
test_ot_tag_SOURCES = hb-ot-tag.cc $(COMPILED_TESTS_SOURCES)
|
||||
test_ot_tag_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_ot_tag_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
test_simd_SOURCES = test-simd.cc $(COMPILED_TESTS_SOURCES)
|
||||
test_simd_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_simd_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
test_unicode_ranges_SOURCES = test-unicode-ranges.cc $(COMPILED_TESTS_SOURCES)
|
||||
test_unicode_ranges_CPPFLAGS = $(COMPILED_TESTS_CPPFLAGS)
|
||||
test_unicode_ranges_LDADD = $(COMPILED_TESTS_LDADD)
|
||||
|
||||
TESTS_ENVIRONMENT = \
|
||||
srcdir="$(srcdir)" \
|
||||
|
|
|
@ -346,19 +346,17 @@ struct hb_sorted_array_t :
|
|||
_hb_cmp_method<T, Type>);
|
||||
}
|
||||
#ifndef HB_NO_SIMD
|
||||
#if 0
|
||||
template <typename U = Type,
|
||||
hb_enable_if (hb_is_same (hb_decay<U>, OT::RangeRecord))>
|
||||
bool bsearch_impl (hb_codepoint_t x, unsigned *pos, hb_priority<1>) const
|
||||
{
|
||||
return hb_simd_bsearch_glyphid_range (pos,
|
||||
return hb_simd_ksearch_glyphid_range (pos,
|
||||
x,
|
||||
this->arrayZ,
|
||||
this->length,
|
||||
sizeof (Type));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
template <typename T> inline hb_sorted_array_t<T>
|
||||
hb_sorted_array (T *array, unsigned int length)
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
*/
|
||||
|
||||
#include "hb.hh"
|
||||
#include "hb-number.hh"
|
||||
#include "hb-machinery.hh"
|
||||
#include "hb-number.hh"
|
||||
#include "hb-number-parser.hh"
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
|
||||
#include "hb.hh"
|
||||
#include "hb-meta.hh"
|
||||
#include "hb-algs.hh"
|
||||
|
||||
/*
|
||||
* = MOTIVATION
|
||||
|
@ -173,6 +174,12 @@
|
|||
* might work just fine. Specially since the second one will be fulfilled
|
||||
* straight from the L1 cachelines.
|
||||
*
|
||||
* Another snag I hit is that AVX2 only has signed comparisons, not unsigned.
|
||||
* So we add a shift to convert uint16_t numbers to int16_t before comparing.
|
||||
*
|
||||
* Also note that the order of arguments to _mm256_set_epi32() and family
|
||||
* is opposite of what I originally assumed. Docs are correct, just not
|
||||
* what you assume.
|
||||
*
|
||||
* PREFETCH
|
||||
*
|
||||
|
@ -194,33 +201,81 @@
|
|||
|
||||
/* TODO: Test -mvzeroupper. */
|
||||
|
||||
static __m256i x HB_UNUSED;
|
||||
static inline bool
|
||||
hb_simd_ksearch_glyphid_range (unsigned *pos, /* Out */
|
||||
hb_codepoint_t k,
|
||||
const void *base,
|
||||
size_t length,
|
||||
size_t stride)
|
||||
{
|
||||
if (unlikely (k & ~0xFFFF))
|
||||
{
|
||||
*pos = length;
|
||||
return false;
|
||||
}
|
||||
|
||||
*pos = 0;
|
||||
|
||||
#define HB_2TIMES(x) (x), (x)
|
||||
#define HB_4TIMES(x) HB_2TIMES(x), HB_2TIMES (x)
|
||||
#define HB_8TIMES(x) HB_4TIMES(x), HB_4TIMES (x)
|
||||
#define HB_16TIMES(x) HB_8TIMES (x), HB_8TIMES (x)
|
||||
|
||||
/* Find deptch of search tree. */
|
||||
static const unsigned steps[] = {1, 9, 81, 729, 6561, 59049};
|
||||
unsigned rank = 1;
|
||||
while (rank < ARRAY_LENGTH (steps) && length >= steps[rank])
|
||||
rank++;
|
||||
|
||||
static const __m256i _1x8 = _mm256_set_epi32 (HB_8TIMES (1));
|
||||
static const __m256i stridex8 = _mm256_set_epi32 (HB_8TIMES (stride));
|
||||
static const __m256i __1x8 = _mm256_set_epi32 (HB_8TIMES (-1));
|
||||
static const __m256i _12345678 = _mm256_set_epi32 (8, 7, 6, 5, 4, 3, 2, 1);
|
||||
static const __m256i __32768x16 = _mm256_set_epi16 (HB_16TIMES (-32768));
|
||||
|
||||
/* Set up key vector. */
|
||||
const __m256i K = _mm256_add_epi16 (_mm256_set_epi16 (HB_16TIMES ((signed) k - 32768)), _1x8);
|
||||
|
||||
while (rank)
|
||||
{
|
||||
unsigned step = steps[--rank];
|
||||
|
||||
/* Load multiple ranges to test against. */
|
||||
const unsigned limit = stride * length;
|
||||
const __m256i limits = _mm256_set_epi32 (HB_8TIMES (limit));
|
||||
const unsigned pitch = stride * step;
|
||||
const __m256i pitches = _mm256_set_epi32 (HB_8TIMES (pitch));
|
||||
const __m256i offsets = _mm256_sub_epi32 (_mm256_mullo_epi32 (pitches, _12345678), stridex8);
|
||||
const __m256i mask = _mm256_cmpgt_epi32 (limits, offsets);
|
||||
|
||||
/* The actual load... */
|
||||
__m256i V = _mm256_mask_i32gather_epi32 (__1x8, (const int *) base, offsets, mask, 1);
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
V = _mm256_add_epi16 (_mm256_slli_epi16 (V, 8),
|
||||
_mm256_srli_epi16 (V, 8));
|
||||
#endif
|
||||
V = _mm256_add_epi16 (V, __32768x16);
|
||||
|
||||
/* Compare and locate. */
|
||||
unsigned answer = hb_ctz (~_mm256_movemask_epi8 (_mm256_cmpgt_epi16 (K, V))) >> 1;
|
||||
bool found = answer & 1;
|
||||
answer = (answer + 1) >> 1;
|
||||
unsigned move = step * answer;
|
||||
*pos += move;
|
||||
if (found)
|
||||
{
|
||||
*pos -= 1;
|
||||
return true;
|
||||
}
|
||||
length -= move;
|
||||
base = (const void *) ((const char *) base + stride * move);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
#elif !defined(HB_NO_SIMD)
|
||||
#define HB_NO_SIMD
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Use to implement faster specializations.
|
||||
*/
|
||||
|
||||
#ifndef HB_NO_SIMD
|
||||
|
||||
#if 0
|
||||
static inline bool
|
||||
hb_simd_bsearch_glyphid_range (unsigned *pos, /* Out */
|
||||
hb_codepoint_t k,
|
||||
const void *base,
|
||||
size_t length,
|
||||
size_t stride)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* HB_SIMD_HH */
|
||||
|
|
|
@ -604,11 +604,11 @@ struct BEInt<Type, 4>
|
|||
#include "hb-meta.hh"
|
||||
#include "hb-mutex.hh"
|
||||
#include "hb-number.hh"
|
||||
#include "hb-simd.hh" // Requires: hb-meta
|
||||
#include "hb-atomic.hh" // Requires: hb-meta
|
||||
#include "hb-null.hh" // Requires: hb-meta
|
||||
#include "hb-algs.hh" // Requires: hb-meta hb-null hb-number
|
||||
#include "hb-iter.hh" // Requires: hb-algs hb-meta
|
||||
#include "hb-simd.hh" // Requires: hb-algs hb-meta
|
||||
#include "hb-debug.hh" // Requires: hb-algs hb-atomic
|
||||
#include "hb-array.hh" // Requires: hb-algs hb-iter hb-null hb-simd
|
||||
#include "hb-vector.hh" // Requires: hb-array hb-null
|
||||
|
|
|
@ -24,8 +24,7 @@
|
|||
*/
|
||||
|
||||
#include "hb.hh"
|
||||
#include "hb-number.hh"
|
||||
#include "hb-number-parser.hh"
|
||||
#include "hb-number.cc"
|
||||
|
||||
|
||||
int
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright © 2019 Facebook, Inc.
|
||||
*
|
||||
* This is part of HarfBuzz, a text shaping library.
|
||||
*
|
||||
* Permission is hereby granted, without written agreement and without
|
||||
* license or royalty fees, to use, copy, modify, and distribute this
|
||||
* software and its documentation for any purpose, provided that the
|
||||
* above copyright notice and the following two paragraphs appear in
|
||||
* all copies of this software.
|
||||
*
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
|
||||
* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
|
||||
* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
|
||||
* DAMAGE.
|
||||
*
|
||||
* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
|
||||
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||
* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
|
||||
* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
*
|
||||
* Facebook Author(s): Behdad Esfahbod
|
||||
*/
|
||||
|
||||
#include "hb.hh"
|
||||
#include "hb-simd.hh"
|
||||
|
||||
|
||||
|
||||
struct U16
|
||||
{
|
||||
U16 (unsigned v_)
|
||||
{
|
||||
v[0] = v_ >> 8;
|
||||
v[1] = v_ & 0xFF;
|
||||
}
|
||||
|
||||
uint8_t v[2];
|
||||
};
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
|
||||
const U16 a[] = {1, 2, 5, 10, 16, 19};
|
||||
|
||||
#define TEST(k, f, p) \
|
||||
{ \
|
||||
unsigned pos = 123456789; \
|
||||
bool found = hb_simd_ksearch_glyphid_range (&pos, \
|
||||
k, \
|
||||
a, \
|
||||
ARRAY_LENGTH (a) / 2, \
|
||||
sizeof (a[0]) * 2); \
|
||||
/*printf ("key %d found %d pos %d\n", k, found, pos);*/ \
|
||||
assert (found == f && pos == p); \
|
||||
}
|
||||
|
||||
TEST (0, false, 0);
|
||||
TEST (1, true , 0);
|
||||
TEST (2, true , 0);
|
||||
TEST (3, false, 1);
|
||||
TEST (4, false, 1);
|
||||
TEST (5, true , 1);
|
||||
TEST (6, true , 1);
|
||||
TEST (7, true , 1);
|
||||
TEST (8, true , 1);
|
||||
TEST (9, true , 1);
|
||||
TEST (10, true , 1);
|
||||
TEST (11, false, 2);
|
||||
TEST (12, false, 2);
|
||||
TEST (13, false, 2);
|
||||
TEST (14, false, 2);
|
||||
TEST (15, false, 2);
|
||||
TEST (16, true , 2);
|
||||
TEST (17, true , 2);
|
||||
TEST (18, true , 2);
|
||||
TEST (19, true , 2);
|
||||
TEST (20, false, 3);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue