ARM64 first character fixes by Sebastian Pop.
This commit is contained in:
parent
9323fa32b2
commit
6f41a5a01a
|
@ -112,14 +112,14 @@ compare_type compare2_type = compare_match1;
|
|||
vect_t cmp1a, cmp1b, cmp2a, cmp2b;
|
||||
const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
|
||||
PCRE2_UCHAR char1a = ic.c.c1;
|
||||
PCRE2_UCHAR char1b = ic.c.c2;
|
||||
PCRE2_UCHAR char2a = ic.c.c3;
|
||||
PCRE2_UCHAR char2b = ic.c.c4;
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
cmp1a = VDUPQ(char1a);
|
||||
cmp2a = VDUPQ(char2a);
|
||||
# else
|
||||
PCRE2_UCHAR char1b = ic.c.c2;
|
||||
PCRE2_UCHAR char2b = ic.c.c4;
|
||||
if (char1a == char1b)
|
||||
cmp1a = VDUPQ(char1a);
|
||||
else
|
||||
|
@ -159,16 +159,26 @@ else
|
|||
}
|
||||
# endif
|
||||
|
||||
str_ptr += offs1;
|
||||
str_ptr += IN_UCHARS(offs1);
|
||||
#endif
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
vect_t char_mask = VDUPQ(0xff);
|
||||
#endif
|
||||
|
||||
#if defined(FF_UTF)
|
||||
restart:;
|
||||
#endif
|
||||
|
||||
#if defined(FFCPS)
|
||||
sljit_u8 *p1 = str_ptr - diff;
|
||||
#endif
|
||||
sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
|
||||
str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf);
|
||||
vect_t data = VLD1Q(str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data = VANDQ(data, char_mask);
|
||||
#endif
|
||||
|
||||
#if defined(FFCS)
|
||||
vect_t eq = VCEQQ(data, vc1);
|
||||
|
@ -186,7 +196,17 @@ eq = VCEQQ(eq, vc1);
|
|||
# if defined(FFCPS_DIFF1)
|
||||
vect_t prev_data = data;
|
||||
# endif
|
||||
vect_t data2 = VLD1Q(str_ptr - diff);
|
||||
|
||||
vect_t data2;
|
||||
if (p1 < str_ptr)
|
||||
{
|
||||
data2 = VLD1Q(str_ptr - diff);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
data2 = shift_left_n_lanes(data, offs1 - offs2);
|
||||
|
||||
data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
|
||||
data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
|
||||
|
@ -223,6 +243,9 @@ str_ptr += 16;
|
|||
while (str_ptr < str_end)
|
||||
{
|
||||
vect_t orig_data = VLD1Q(str_ptr);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
orig_data = VANDQ(orig_data, char_mask);
|
||||
#endif
|
||||
data = orig_data;
|
||||
|
||||
#if defined(FFCS)
|
||||
|
@ -240,9 +263,12 @@ while (str_ptr < str_end)
|
|||
|
||||
#if defined(FFCPS)
|
||||
# if defined (FFCPS_DIFF1)
|
||||
data2 = VEXTQ(prev_data, data, 15);
|
||||
data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
|
||||
# else
|
||||
data2 = VLD1Q(str_ptr - diff);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
data2 = VANDQ(data2, char_mask);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef FFCPS_CHAR1A2A
|
||||
|
|
|
@ -655,8 +655,9 @@ return (*s & 0xfc00) == 0xdc00;
|
|||
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
# define VECTOR_FACTOR 16
|
||||
# define vect_t uint8x16_t
|
||||
# define VLD1Q vld1q_u8
|
||||
# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
|
||||
# define VCEQQ vceqq_u8
|
||||
# define VORRQ vorrq_u8
|
||||
# define VST1Q vst1q_u8
|
||||
|
@ -668,8 +669,9 @@ typedef union {
|
|||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
# define VECTOR_FACTOR 8
|
||||
# define vect_t uint16x8_t
|
||||
# define VLD1Q vld1q_u16
|
||||
# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
|
||||
# define VCEQQ vceqq_u16
|
||||
# define VORRQ vorrq_u16
|
||||
# define VST1Q vst1q_u16
|
||||
|
@ -681,8 +683,9 @@ typedef union {
|
|||
uint64_t dw[2];
|
||||
} quad_word;
|
||||
#else
|
||||
# define VECTOR_FACTOR 4
|
||||
# define vect_t uint32x4_t
|
||||
# define VLD1Q vld1q_u32
|
||||
# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
|
||||
# define VCEQQ vceqq_u32
|
||||
# define VORRQ vorrq_u32
|
||||
# define VST1Q vst1q_u32
|
||||
|
@ -697,23 +700,29 @@ typedef union {
|
|||
|
||||
#define FFCS
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
#undef FFCS
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS
|
||||
|
||||
#define FFCS_2
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS_2
|
||||
|
||||
#define FFCS_MASK
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCS_MASK
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
|
||||
|
@ -842,33 +851,62 @@ return 3;
|
|||
#endif
|
||||
}
|
||||
|
||||
/* ARM doesn't have a shift left across lanes. */
|
||||
static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
|
||||
{
|
||||
vect_t zero = VDUPQ(0);
|
||||
SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
|
||||
/* VEXTQ takes an immediate as last argument. */
|
||||
#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
|
||||
switch (n)
|
||||
{
|
||||
C(1); C(2); C(3);
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 32
|
||||
C(4); C(5); C(6); C(7);
|
||||
# if PCRE2_CODE_UNIT_WIDTH != 16
|
||||
C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
|
||||
# endif
|
||||
#endif
|
||||
default:
|
||||
/* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
|
||||
happen. The return is still here for compilers to not warn. */
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
#define FFCPS
|
||||
#define FFCPS_DIFF1
|
||||
#define FFCPS_CHAR1A2A
|
||||
|
||||
#define FFCPS_0
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS_0
|
||||
|
||||
#undef FFCPS_CHAR1A2A
|
||||
|
||||
#define FFCPS_1
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS_1
|
||||
|
||||
#undef FFCPS_DIFF1
|
||||
|
||||
#define FFCPS_DEFAULT
|
||||
#include "pcre2_jit_neon_inc.h"
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
# define FF_UTF
|
||||
# include "pcre2_jit_neon_inc.h"
|
||||
# undef FF_UTF
|
||||
#endif
|
||||
#undef FFCPS
|
||||
|
||||
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
|
||||
|
|
Loading…
Reference in New Issue