Implement simd support for requested character in JIT.
This commit is contained in:
parent
4a7dfab0ec
commit
a3057bbecd
|
@ -6107,32 +6107,28 @@ if (common->match_end_ptr != 0)
|
||||||
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
|
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
|
static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
|
||||||
{
|
{
|
||||||
DEFINE_COMPILER;
|
DEFINE_COMPILER;
|
||||||
struct sljit_label *loop;
|
struct sljit_label *loop;
|
||||||
struct sljit_jump *toolong;
|
struct sljit_jump *toolong;
|
||||||
struct sljit_jump *alreadyfound;
|
struct sljit_jump *already_found;
|
||||||
struct sljit_jump *found;
|
struct sljit_jump *found;
|
||||||
struct sljit_jump *foundoc = NULL;
|
struct sljit_jump *found_oc = NULL;
|
||||||
struct sljit_jump *notfound;
|
jump_list *not_found = NULL;
|
||||||
sljit_u32 oc, bit;
|
sljit_u32 oc, bit;
|
||||||
|
|
||||||
SLJIT_ASSERT(common->req_char_ptr != 0);
|
SLJIT_ASSERT(common->req_char_ptr != 0);
|
||||||
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
|
||||||
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX);
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
|
||||||
toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
|
toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
|
||||||
alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
|
already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
|
||||||
|
|
||||||
if (has_firstchar)
|
if (has_firstchar)
|
||||||
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
else
|
else
|
||||||
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
|
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
|
||||||
|
|
||||||
loop = LABEL();
|
|
||||||
notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
|
|
||||||
|
|
||||||
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
|
|
||||||
oc = req_char;
|
oc = req_char;
|
||||||
if (caseless)
|
if (caseless)
|
||||||
{
|
{
|
||||||
|
@ -6142,9 +6138,23 @@ if (caseless)
|
||||||
oc = UCD_OTHERCASE(req_char);
|
oc = UCD_OTHERCASE(req_char);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (req_char == oc)
|
|
||||||
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
|
#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
|
||||||
|
if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
|
||||||
|
{
|
||||||
|
not_found = fast_requested_char_simd(common, req_char, oc);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
loop = LABEL();
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
|
||||||
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
|
||||||
|
|
||||||
|
if (req_char == oc)
|
||||||
|
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
|
||||||
|
else
|
||||||
{
|
{
|
||||||
bit = req_char ^ oc;
|
bit = req_char ^ oc;
|
||||||
if (is_powerof2(bit))
|
if (is_powerof2(bit))
|
||||||
|
@ -6155,19 +6165,22 @@ else
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
|
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
|
||||||
foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
|
found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
JUMPTO(SLJIT_JUMP, loop);
|
JUMPTO(SLJIT_JUMP, loop);
|
||||||
|
|
||||||
|
JUMPHERE(found);
|
||||||
|
if (found_oc)
|
||||||
|
JUMPHERE(found_oc);
|
||||||
|
}
|
||||||
|
|
||||||
JUMPHERE(found);
|
|
||||||
if (foundoc)
|
|
||||||
JUMPHERE(foundoc);
|
|
||||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
|
||||||
JUMPHERE(alreadyfound);
|
|
||||||
|
JUMPHERE(already_found);
|
||||||
JUMPHERE(toolong);
|
JUMPHERE(toolong);
|
||||||
return notfound;
|
return not_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_revertframes(compiler_common *common)
|
static void do_revertframes(compiler_common *common)
|
||||||
|
@ -13135,9 +13148,9 @@ struct sljit_label *reset_match_label;
|
||||||
struct sljit_label *quit_label;
|
struct sljit_label *quit_label;
|
||||||
struct sljit_jump *jump;
|
struct sljit_jump *jump;
|
||||||
struct sljit_jump *minlength_check_failed = NULL;
|
struct sljit_jump *minlength_check_failed = NULL;
|
||||||
struct sljit_jump *reqbyte_notfound = NULL;
|
|
||||||
struct sljit_jump *empty_match = NULL;
|
struct sljit_jump *empty_match = NULL;
|
||||||
struct sljit_jump *end_anchor_failed = NULL;
|
struct sljit_jump *end_anchor_failed = NULL;
|
||||||
|
jump_list *reqcu_not_found = NULL;
|
||||||
|
|
||||||
SLJIT_ASSERT(tables);
|
SLJIT_ASSERT(tables);
|
||||||
|
|
||||||
|
@ -13403,7 +13416,7 @@ if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PC
|
||||||
minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
|
minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
|
||||||
}
|
}
|
||||||
if (common->req_char_ptr != 0)
|
if (common->req_char_ptr != 0)
|
||||||
reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
|
reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
|
||||||
|
|
||||||
/* Store the current STR_PTR in OVECTOR(0). */
|
/* Store the current STR_PTR in OVECTOR(0). */
|
||||||
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
|
||||||
|
@ -13538,8 +13551,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* No more remaining characters. */
|
/* No more remaining characters. */
|
||||||
if (reqbyte_notfound != NULL)
|
if (reqcu_not_found != NULL)
|
||||||
JUMPHERE(reqbyte_notfound);
|
set_jumps(reqcu_not_found, LABEL());
|
||||||
|
|
||||||
if (mode == PCRE2_JIT_PARTIAL_SOFT)
|
if (mode == PCRE2_JIT_PARTIAL_SOFT)
|
||||||
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
|
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
|
||||||
|
|
|
@ -344,6 +344,136 @@ if (common->utf && offset > 0)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
|
||||||
|
|
||||||
|
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
|
||||||
|
{
|
||||||
|
DEFINE_COMPILER;
|
||||||
|
struct sljit_label *start;
|
||||||
|
struct sljit_jump *quit;
|
||||||
|
jump_list *not_found = NULL;
|
||||||
|
sse2_compare_type compare_type = sse2_compare_match1;
|
||||||
|
sljit_u8 instruction[8];
|
||||||
|
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
|
||||||
|
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
|
||||||
|
sljit_s32 data_ind = 0;
|
||||||
|
sljit_s32 tmp_ind = 1;
|
||||||
|
sljit_s32 cmp1_ind = 2;
|
||||||
|
sljit_s32 cmp2_ind = 3;
|
||||||
|
sljit_u32 bit = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
bit = char1 ^ char2;
|
||||||
|
compare_type = sse2_compare_match1i;
|
||||||
|
|
||||||
|
if (!is_powerof2(bit))
|
||||||
|
{
|
||||||
|
bit = 0;
|
||||||
|
compare_type = sse2_compare_match2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
|
||||||
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
|
||||||
|
|
||||||
|
/* First part (unaligned start) */
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
|
||||||
|
|
||||||
|
SLJIT_ASSERT(tmp1_reg_ind < 8);
|
||||||
|
|
||||||
|
/* MOVD xmm, r/m32 */
|
||||||
|
instruction[0] = 0x66;
|
||||||
|
instruction[1] = 0x0f;
|
||||||
|
instruction[2] = 0x6e;
|
||||||
|
instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
|
||||||
|
|
||||||
|
/* MOVD xmm, r/m32 */
|
||||||
|
instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* PSHUFD xmm1, xmm2/m128, imm8 */
|
||||||
|
/* instruction[0] = 0x66; */
|
||||||
|
/* instruction[1] = 0x0f; */
|
||||||
|
instruction[2] = 0x70;
|
||||||
|
instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
|
||||||
|
instruction[4] = 0;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 5);
|
||||||
|
|
||||||
|
if (char1 != char2)
|
||||||
|
{
|
||||||
|
/* PSHUFD xmm1, xmm2/m128, imm8 */
|
||||||
|
instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
|
||||||
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
|
||||||
|
|
||||||
|
load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* PMOVMSKB reg, xmm */
|
||||||
|
/* instruction[0] = 0x66; */
|
||||||
|
/* instruction[1] = 0x0f; */
|
||||||
|
instruction[2] = 0xd7;
|
||||||
|
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
|
||||||
|
|
||||||
|
quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
|
||||||
|
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
|
||||||
|
|
||||||
|
/* Second part (aligned) */
|
||||||
|
start = LABEL();
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
|
||||||
|
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
|
|
||||||
|
load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
|
||||||
|
for (i = 0; i < 4; i++)
|
||||||
|
fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
|
||||||
|
|
||||||
|
/* PMOVMSKB reg, xmm */
|
||||||
|
/* instruction[0] = 0x66; */
|
||||||
|
/* instruction[1] = 0x0f; */
|
||||||
|
instruction[2] = 0xd7;
|
||||||
|
instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 4);
|
||||||
|
|
||||||
|
CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
|
||||||
|
|
||||||
|
JUMPHERE(quit);
|
||||||
|
|
||||||
|
/* BSF r32, r/m32 */
|
||||||
|
instruction[0] = 0x0f;
|
||||||
|
instruction[1] = 0xbc;
|
||||||
|
instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
|
||||||
|
sljit_emit_op_custom(compiler, instruction, 3);
|
||||||
|
|
||||||
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0);
|
||||||
|
add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
|
||||||
|
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
|
||||||
|
return not_found;
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef _WIN64
|
#ifndef _WIN64
|
||||||
|
|
||||||
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
|
||||||
|
|
Loading…
Reference in New Issue