[simd] Use faster byteswap, using shuffle

This commit is contained in:
Behdad Esfahbod 2019-12-08 22:31:46 -06:00
parent 343da74ec6
commit 6013ead1f5
1 changed files with 5 additions and 2 deletions

View File

@ -251,8 +251,11 @@ hb_simd_ksearch_glyphid_range (unsigned *pos, /* Out */
/* The actual load... */ /* The actual load... */
__m256i V = _mm256_mask_i32gather_epi32 (__1x8, (const int *) base, offsets, mask, 1); __m256i V = _mm256_mask_i32gather_epi32 (__1x8, (const int *) base, offsets, mask, 1);
#if __BYTE_ORDER == __LITTLE_ENDIAN #if __BYTE_ORDER == __LITTLE_ENDIAN
V = _mm256_add_epi16 (_mm256_slli_epi16 (V, 8), static const __m256i bswap16_shuffle = _mm256_set_epi16 (0x0E0F,0x0C0D,0x0A0B,0x0809,
_mm256_srli_epi16 (V, 8)); 0x0607,0x0405,0x0203,0x0001,
0x0E0F,0x0C0D,0x0A0B,0x0809,
0x0607,0x0405,0x0203,0x0001);
V = _mm256_shuffle_epi8 (V, bswap16_shuffle);
#endif #endif
V = _mm256_add_epi16 (V, __32768x16); V = _mm256_add_epi16 (V, __32768x16);