Mixing SSE2 instructions in JIT.

This commit is contained in:
Zoltán Herczeg 2019-06-25 09:29:37 +00:00
parent 7768756737
commit 7f24a98cfb
1 changed files with 55 additions and 22 deletions

View File

@ -5555,57 +5555,80 @@ typedef enum {
sse2_compare_match2, sse2_compare_match2,
} sse2_compare_type; } sse2_compare_type;
static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type,
sse2_compare_type compare_type, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
{ {
sljit_u8 instruction[4]; sljit_u8 instruction[4];
instruction[0] = 0x66; instruction[0] = 0x66;
instruction[1] = 0x0f; instruction[1] = 0x0f;
SLJIT_ASSERT(step >= 0 && step <= 3);
if (compare_type != sse2_compare_match2) if (compare_type != sse2_compare_match2)
{ {
if (compare_type == sse2_compare_match1i) if (step == 0)
{ {
/* POR xmm1, xmm2/m128 */ if (compare_type == sse2_compare_match1i)
/* instruction[0] = 0x66; */ {
/* instruction[1] = 0x0f; */ /* POR xmm1, xmm2/m128 */
instruction[2] = 0xeb; /* instruction[0] = 0x66; */
instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; /* instruction[1] = 0x0f; */
sljit_emit_op_custom(compiler, instruction, 4); instruction[2] = 0xeb;
instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
sljit_emit_op_custom(compiler, instruction, 4);
}
return;
} }
if (step != 2)
return;
/* PCMPEQB/W/D xmm1, xmm2/m128 */ /* PCMPEQB/W/D xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */ /* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */ /* instruction[1] = 0x0f; */
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4); sljit_emit_op_custom(compiler, instruction, 4);
return;
} }
else
switch (step)
{ {
case 0:
/* MOVDQA xmm1, xmm2/m128 */ /* MOVDQA xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */ /* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */ /* instruction[1] = 0x0f; */
instruction[2] = 0x6f; instruction[2] = 0x6f;
instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
sljit_emit_op_custom(compiler, instruction, 4); sljit_emit_op_custom(compiler, instruction, 4);
return;
case 1:
/* PCMPEQB/W/D xmm1, xmm2/m128 */ /* PCMPEQB/W/D xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */ /* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */ /* instruction[1] = 0x0f; */
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
sljit_emit_op_custom(compiler, instruction, 4); sljit_emit_op_custom(compiler, instruction, 4);
return;
case 2:
/* PCMPEQB/W/D xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
sljit_emit_op_custom(compiler, instruction, 4); sljit_emit_op_custom(compiler, instruction, 4);
return;
case 3:
/* POR xmm1, xmm2/m128 */ /* POR xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */ /* instruction[0] = 0x66; */
/* instruction[1] = 0x0f; */ /* instruction[1] = 0x0f; */
instruction[2] = 0xeb; instruction[2] = 0xeb;
instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
sljit_emit_op_custom(compiler, instruction, 4); sljit_emit_op_custom(compiler, instruction, 4);
return;
} }
} }
@ -5627,6 +5650,7 @@ sljit_s32 tmp_ind = 1;
sljit_s32 cmp1_ind = 2; sljit_s32 cmp1_ind = 2;
sljit_s32 cmp2_ind = 3; sljit_s32 cmp2_ind = 3;
sljit_u32 bit = 0; sljit_u32 bit = 0;
int i;
SLJIT_UNUSED_ARG(offset); SLJIT_UNUSED_ARG(offset);
@ -5692,7 +5716,8 @@ OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
fast_forward_char_pair_sse2_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind); for (i = 0; i < 4; i++)
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
/* PMOVMSKB reg, xmm */ /* PMOVMSKB reg, xmm */
/* instruction[0] = 0x66; */ /* instruction[0] = 0x66; */
@ -5718,7 +5743,8 @@ if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[1]); add_jump(compiler, &common->failed_match, partial_quit[1]);
load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
fast_forward_char_pair_sse2_compare(compiler, compare_type, data_ind, cmp1_ind, cmp2_ind, tmp_ind); for (i = 0; i < 4; i++)
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
/* PMOVMSKB reg, xmm */ /* PMOVMSKB reg, xmm */
/* instruction[0] = 0x66; */ /* instruction[0] = 0x66; */
@ -5797,18 +5823,19 @@ sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
sljit_s32 data1_ind = 0; sljit_s32 data1_ind = 0;
sljit_s32 data2_ind = 1; sljit_s32 data2_ind = 1;
sljit_s32 tmp_ind = 2; sljit_s32 tmp1_ind = 2;
sljit_s32 cmp1a_ind = 3; sljit_s32 tmp2_ind = 3;
sljit_s32 cmp1b_ind = 4; sljit_s32 cmp1a_ind = 4;
sljit_s32 cmp2a_ind = 5; sljit_s32 cmp1b_ind = 5;
sljit_s32 cmp2b_ind = 6; sljit_s32 cmp2a_ind = 6;
sljit_s32 cmp2b_ind = 7;
struct sljit_label *start; struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart; struct sljit_label *restart;
#endif #endif
struct sljit_jump *jump[2]; struct sljit_jump *jump[2];
sljit_u8 instruction[8]; sljit_u8 instruction[8];
int i;
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset())); SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset()));
@ -5951,8 +5978,11 @@ JUMPHERE(jump[1]);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
fast_forward_char_pair_sse2_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind); for (i = 0; i < 4; i++)
fast_forward_char_pair_sse2_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind); {
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
}
/* PAND xmm1, xmm2/m128 */ /* PAND xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */ /* instruction[0] = 0x66; */
@ -5985,8 +6015,11 @@ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, S
load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
fast_forward_char_pair_sse2_compare(compiler, compare1_type, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind); for (i = 0; i < 4; i++)
fast_forward_char_pair_sse2_compare(compiler, compare2_type, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind); {
fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
}
/* PAND xmm1, xmm2/m128 */ /* PAND xmm1, xmm2/m128 */
/* instruction[0] = 0x66; */ /* instruction[0] = 0x66; */