diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 21b5a40..3d2403a 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -4146,12 +4146,20 @@ if (firstline) { SLJIT_ASSERT(common->first_line_end != 0); OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); - OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(offset + 1)); - quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0); - OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); - JUMPHERE(quit); + OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1)); +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + if (sljit_x86_is_cmov_available()) + { + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0); + sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0); + } +#endif + { + quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + JUMPHERE(quit); + } } #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 @@ -4163,49 +4171,55 @@ if (common->utf && offset > 0) /* SSE2 accelerated first character search. */ -if (sljit_is_fpu_available()) +if (sljit_x86_is_sse2_available()) { fast_forward_first_char2_sse2(common, char1, char2); - quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); - if (firstline) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); - else - OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); - - if (offset > 0) - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0); + if (common->mode == PCRE2_JIT_COMPLETE) + { + /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */ + SLJIT_ASSERT(common->forced_quit_label == NULL); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH); + add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf && offset > 0) - { - utf_quit = JUMP(SLJIT_JUMP); + if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); - JUMPHERE(quit); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); #if PCRE2_CODE_UNIT_WIDTH == 8 - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); - CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); #elif PCRE2_CODE_UNIT_WIDTH == 16 - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); #else #error "Unknown code width" #endif - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - JUMPHERE(utf_quit); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } +#endif + + if (offset > 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + } + else if (sljit_x86_is_cmov_available()) + { + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0); } else -#endif + { + quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_PTR, 0, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0); JUMPHERE(quit); - - if (offset > 0) - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + } if (firstline) OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); - return; } diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h index f0969da..2e2e9ac 100644 --- a/src/sljit/sljitLir.h +++ b/src/sljit/sljitLir.h @@ -869,34 +869,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w); -/* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register index ( >=0 ) of any SLJIT_R, - SLJIT_S and SLJIT_SP registers. - - Note: it returns with -1 for virtual registers (only on x86-32). */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg); - -/* The following function is a helper function for sljit_emit_op_custom. - It returns with the real machine register index of any SLJIT_FLOAT register. - - Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg); - -/* Any instruction can be inserted into the instruction stream by - sljit_emit_op_custom. It has a similar purpose as inline assembly. - The size parameter must match to the instruction size of the target - architecture: - - x86: 0 < size <= 15. The instruction argument can be byte aligned. - Thumb2: if size == 2, the instruction argument must be 2 byte aligned. - if size == 4, the instruction argument must be 4 byte aligned. - Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_si size); - /* Returns with non-zero if fpu is available. */ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void); @@ -1214,4 +1186,64 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ +/* --------------------------------------------------------------------- */ +/* CPU specific functions */ +/* --------------------------------------------------------------------- */ + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index ( >=0 ) of any SLJIT_R, + SLJIT_S and SLJIT_SP registers. + + Note: it returns with -1 for virtual registers (only on x86-32). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg); + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index of any SLJIT_FLOAT register. + + Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg); + +/* Any instruction can be inserted into the instruction stream by + sljit_emit_op_custom. It has a similar purpose as inline assembly. + The size parameter must match to the instruction size of the target + architecture: + + x86: 0 < size <= 15. The instruction argument can be byte aligned. + Thumb2: if size == 2, the instruction argument must be 2 byte aligned. + if size == 4, the instruction argument must be 4 byte aligned. + Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size); + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +/* Returns with non-zero if sse2 is available. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void); + +/* Returns with non-zero if cmov instruction is available. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void); + +/* Emit a conditional mov instruction on x86 CPUs. This instruction + moves src to destination, if the condition is satisfied. Unlike + other arithmetic instructions, destination must be a register. + Before such instructions are emitted, cmov support should be + checked by sljit_x86_is_cmov_available function. + type must be between SLJIT_EQUAL and SLJIT_S_ORDERED + dst_reg must be a valid register and it can be combined + with SLJIT_INT_OP to perform 32 bit arithmetic + Flags: I - (never set any flags) + */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler, + sljit_si type, + sljit_si dst_reg, + sljit_si src, sljit_sw srcw); + +#endif + #endif /* _SLJIT_LIR_H_ */ diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c index e148c34..416c15a 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/src/sljit/sljitNativeX86_common.c @@ -2936,3 +2936,69 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta { *(sljit_sw*)addr = new_constant; } + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void) +{ +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else + return 1; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void) +{ + if (cpu_has_cmov == -1) + get_cpu_features(); + return cpu_has_cmov; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler, + sljit_si type, + sljit_si dst_reg, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + + CHECK_ERROR(); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_x86_is_cmov_available()); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP)); + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " x86_cmov%s %s%s, ", + !(dst_reg & SLJIT_INT_OP) ? "" : ".i", + JUMP_PREFIX(type), jump_names[type & 0xff]); + sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = dst_reg & SLJIT_INT_OP; +#endif + dst_reg &= ~SLJIT_INT_OP; + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = get_jump_code(type & 0xff) - 0x40; + return SLJIT_SUCCESS; +}