From a3057bbecd4192d729ec38e4bd7b387ff47984f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Herczeg?= Date: Mon, 24 Feb 2020 05:26:15 +0000 Subject: [PATCH] Implement simd support for requested character in JIT. --- src/pcre2_jit_compile.c | 83 ++++++++++++++----------- src/pcre2_jit_simd_inc.h | 130 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 35 deletions(-) diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index b42b335..8aa57dd 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -6107,32 +6107,28 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); } -static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar) +static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar) { DEFINE_COMPILER; struct sljit_label *loop; struct sljit_jump *toolong; -struct sljit_jump *alreadyfound; +struct sljit_jump *already_found; struct sljit_jump *found; -struct sljit_jump *foundoc = NULL; -struct sljit_jump *notfound; +struct sljit_jump *found_oc = NULL; +jump_list *not_found = NULL; sljit_u32 oc, bit; SLJIT_ASSERT(common->req_char_ptr != 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); -OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX); -toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0); -alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); +toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0); +already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0); if (has_firstchar) OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); else OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); -loop = LABEL(); -notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0); - -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); oc = req_char; if (caseless) { @@ -6142,32 +6138,49 @@ if (caseless) oc = UCD_OTHERCASE(req_char); #endif } -if (req_char == oc) - found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); -else + +#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD +if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD) { - bit = req_char ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); - } + not_found = fast_requested_char_simd(common, req_char, oc); + } +else +#endif + { + loop = LABEL(); + add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); + + if (req_char == oc) + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); else { - found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); - foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); + bit = req_char ^ oc; + if (is_powerof2(bit)) + { + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); + } + else + { + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); + } } - } -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_JUMP, loop); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_JUMP, loop); + + JUMPHERE(found); + if (found_oc) + JUMPHERE(found_oc); + } -JUMPHERE(found); -if (foundoc) - JUMPHERE(foundoc); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); -JUMPHERE(alreadyfound); + +JUMPHERE(already_found); JUMPHERE(toolong); -return notfound; +return not_found; } static void do_revertframes(compiler_common *common) @@ -13135,9 +13148,9 @@ struct sljit_label *reset_match_label; struct sljit_label *quit_label; struct sljit_jump *jump; struct sljit_jump *minlength_check_failed = NULL; -struct sljit_jump *reqbyte_notfound = NULL; struct sljit_jump *empty_match = NULL; struct sljit_jump *end_anchor_failed = NULL; +jump_list *reqcu_not_found = NULL; SLJIT_ASSERT(tables); @@ -13403,7 +13416,7 @@ if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PC minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0); } if (common->req_char_ptr != 0) - reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0); + reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0); /* Store the current STR_PTR in OVECTOR(0). */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); @@ -13538,8 +13551,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0) } /* No more remaining characters. */ -if (reqbyte_notfound != NULL) - JUMPHERE(reqbyte_notfound); +if (reqcu_not_found != NULL) + set_jumps(reqcu_not_found, LABEL()); if (mode == PCRE2_JIT_PARTIAL_SOFT) CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); diff --git a/src/pcre2_jit_simd_inc.h b/src/pcre2_jit_simd_inc.h index f7d56b2..5673d33 100644 --- a/src/pcre2_jit_simd_inc.h +++ b/src/pcre2_jit_simd_inc.h @@ -344,6 +344,136 @@ if (common->utf && offset > 0) #endif } +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) + +static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *quit; +jump_list *not_found = NULL; +sse2_compare_type compare_type = sse2_compare_match1; +sljit_u8 instruction[8]; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_u32 bit = 0; +int i; + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = sse2_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = sse2_compare_match2; + } + } + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); +OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); +OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + +/* First part (unaligned start) */ + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); + +SLJIT_ASSERT(tmp1_reg_ind < 8); + +/* MOVD xmm, r/m32 */ +instruction[0] = 0x66; +instruction[1] = 0x0f; +instruction[2] = 0x6e; +instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (char1 != char2) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + + /* MOVD xmm, r/m32 */ + instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); + +/* PSHUFD xmm1, xmm2/m128, imm8 */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0x70; +instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; +instruction[4] = 0; +sljit_emit_op_custom(compiler, instruction, 5); + +if (char1 != char2) + { + /* PSHUFD xmm1, xmm2/m128, imm8 */ + instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); + +load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* PMOVMSKB reg, xmm */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); + +quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); +for (i = 0; i < 4; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + +/* PMOVMSKB reg, xmm */ +/* instruction[0] = 0x66; */ +/* instruction[1] = 0x0f; */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); + +JUMPHERE(quit); + +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0); +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); +return not_found; +} + #ifndef _WIN64 static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)