SSE2 refactor, JIT compiler update.
This commit is contained in:
parent
170644eca3
commit
ccda7d218f
|
@ -4146,12 +4146,20 @@ if (firstline)
|
||||||
{
|
{
|
||||||
SLJIT_ASSERT(common->first_line_end != 0);
|
SLJIT_ASSERT(common->first_line_end != 0);
|
||||||
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
|
||||||
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
|
|
||||||
|
|
||||||
OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
|
OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1));
|
||||||
quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
|
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
if (sljit_x86_is_cmov_available())
|
||||||
JUMPHERE(quit);
|
{
|
||||||
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
|
||||||
|
sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
|
||||||
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||||
|
JUMPHERE(quit);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
|
@ -4163,49 +4171,55 @@ if (common->utf && offset > 0)
|
||||||
|
|
||||||
/* SSE2 accelerated first character search. */
|
/* SSE2 accelerated first character search. */
|
||||||
|
|
||||||
if (sljit_is_fpu_available())
|
if (sljit_x86_is_sse2_available())
|
||||||
{
|
{
|
||||||
fast_forward_first_char2_sse2(common, char1, char2);
|
fast_forward_first_char2_sse2(common, char1, char2);
|
||||||
|
|
||||||
quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
|
||||||
if (firstline)
|
if (common->mode == PCRE2_JIT_COMPLETE)
|
||||||
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
|
{
|
||||||
else
|
/* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
|
||||||
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
|
SLJIT_ASSERT(common->forced_quit_label == NULL);
|
||||||
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
|
||||||
if (offset > 0)
|
add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
|
|
||||||
|
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||||
if (common->utf && offset > 0)
|
if (common->utf && offset > 0)
|
||||||
{
|
{
|
||||||
utf_quit = JUMP(SLJIT_JUMP);
|
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
|
||||||
|
|
||||||
JUMPHERE(quit);
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
||||||
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
|
||||||
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
|
||||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
|
||||||
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
|
||||||
#else
|
#else
|
||||||
#error "Unknown code width"
|
#error "Unknown code width"
|
||||||
#endif
|
#endif
|
||||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
|
||||||
JUMPHERE(utf_quit);
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (offset > 0)
|
||||||
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
|
||||||
|
}
|
||||||
|
else if (sljit_x86_is_cmov_available())
|
||||||
|
{
|
||||||
|
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
|
||||||
|
sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
{
|
||||||
|
quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
|
||||||
|
OP1(SLJIT_MOV, STR_PTR, 0, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
|
||||||
JUMPHERE(quit);
|
JUMPHERE(quit);
|
||||||
|
}
|
||||||
if (offset > 0)
|
|
||||||
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
|
|
||||||
|
|
||||||
if (firstline)
|
if (firstline)
|
||||||
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -869,34 +869,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
|
||||||
sljit_si src1, sljit_sw src1w,
|
sljit_si src1, sljit_sw src1w,
|
||||||
sljit_si src2, sljit_sw src2w);
|
sljit_si src2, sljit_sw src2w);
|
||||||
|
|
||||||
/* The following function is a helper function for sljit_emit_op_custom.
|
|
||||||
It returns with the real machine register index ( >=0 ) of any SLJIT_R,
|
|
||||||
SLJIT_S and SLJIT_SP registers.
|
|
||||||
|
|
||||||
Note: it returns with -1 for virtual registers (only on x86-32). */
|
|
||||||
|
|
||||||
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
|
|
||||||
|
|
||||||
/* The following function is a helper function for sljit_emit_op_custom.
|
|
||||||
It returns with the real machine register index of any SLJIT_FLOAT register.
|
|
||||||
|
|
||||||
Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
|
|
||||||
|
|
||||||
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
|
|
||||||
|
|
||||||
/* Any instruction can be inserted into the instruction stream by
|
|
||||||
sljit_emit_op_custom. It has a similar purpose as inline assembly.
|
|
||||||
The size parameter must match to the instruction size of the target
|
|
||||||
architecture:
|
|
||||||
|
|
||||||
x86: 0 < size <= 15. The instruction argument can be byte aligned.
|
|
||||||
Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
|
|
||||||
if size == 4, the instruction argument must be 4 byte aligned.
|
|
||||||
Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
|
|
||||||
|
|
||||||
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
|
|
||||||
void *instruction, sljit_si size);
|
|
||||||
|
|
||||||
/* Returns with non-zero if fpu is available. */
|
/* Returns with non-zero if fpu is available. */
|
||||||
|
|
||||||
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
|
||||||
|
@ -1214,4 +1186,64 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct
|
||||||
|
|
||||||
#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
|
#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
/* CPU specific functions */
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* The following function is a helper function for sljit_emit_op_custom.
|
||||||
|
It returns with the real machine register index ( >=0 ) of any SLJIT_R,
|
||||||
|
SLJIT_S and SLJIT_SP registers.
|
||||||
|
|
||||||
|
Note: it returns with -1 for virtual registers (only on x86-32). */
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
|
||||||
|
|
||||||
|
/* The following function is a helper function for sljit_emit_op_custom.
|
||||||
|
It returns with the real machine register index of any SLJIT_FLOAT register.
|
||||||
|
|
||||||
|
Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
|
||||||
|
|
||||||
|
/* Any instruction can be inserted into the instruction stream by
|
||||||
|
sljit_emit_op_custom. It has a similar purpose as inline assembly.
|
||||||
|
The size parameter must match to the instruction size of the target
|
||||||
|
architecture:
|
||||||
|
|
||||||
|
x86: 0 < size <= 15. The instruction argument can be byte aligned.
|
||||||
|
Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
|
||||||
|
if size == 4, the instruction argument must be 4 byte aligned.
|
||||||
|
Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
|
||||||
|
void *instruction, sljit_si size);
|
||||||
|
|
||||||
|
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||||
|
|
||||||
|
/* Returns with non-zero if sse2 is available. */
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void);
|
||||||
|
|
||||||
|
/* Returns with non-zero if cmov instruction is available. */
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void);
|
||||||
|
|
||||||
|
/* Emit a conditional mov instruction on x86 CPUs. This instruction
|
||||||
|
moves src to destination, if the condition is satisfied. Unlike
|
||||||
|
other arithmetic instructions, destination must be a register.
|
||||||
|
Before such instructions are emitted, cmov support should be
|
||||||
|
checked by sljit_x86_is_cmov_available function.
|
||||||
|
type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
|
||||||
|
dst_reg must be a valid register and it can be combined
|
||||||
|
with SLJIT_INT_OP to perform 32 bit arithmetic
|
||||||
|
Flags: I - (never set any flags)
|
||||||
|
*/
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
|
||||||
|
sljit_si type,
|
||||||
|
sljit_si dst_reg,
|
||||||
|
sljit_si src, sljit_sw srcw);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _SLJIT_LIR_H_ */
|
#endif /* _SLJIT_LIR_H_ */
|
||||||
|
|
|
@ -2936,3 +2936,69 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
|
||||||
{
|
{
|
||||||
*(sljit_sw*)addr = new_constant;
|
*(sljit_sw*)addr = new_constant;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
|
||||||
|
{
|
||||||
|
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
|
||||||
|
if (cpu_has_sse2 == -1)
|
||||||
|
get_cpu_features();
|
||||||
|
return cpu_has_sse2;
|
||||||
|
#else
|
||||||
|
return 1;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
|
||||||
|
{
|
||||||
|
if (cpu_has_cmov == -1)
|
||||||
|
get_cpu_features();
|
||||||
|
return cpu_has_cmov;
|
||||||
|
}
|
||||||
|
|
||||||
|
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
|
||||||
|
sljit_si type,
|
||||||
|
sljit_si dst_reg,
|
||||||
|
sljit_si src, sljit_sw srcw)
|
||||||
|
{
|
||||||
|
sljit_ub* inst;
|
||||||
|
|
||||||
|
CHECK_ERROR();
|
||||||
|
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
|
||||||
|
CHECK_ARGUMENT(sljit_x86_is_cmov_available());
|
||||||
|
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
|
||||||
|
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
|
||||||
|
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
|
||||||
|
FUNCTION_CHECK_SRC(src, srcw);
|
||||||
|
#endif
|
||||||
|
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
|
||||||
|
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
|
||||||
|
fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
|
||||||
|
!(dst_reg & SLJIT_INT_OP) ? "" : ".i",
|
||||||
|
JUMP_PREFIX(type), jump_names[type & 0xff]);
|
||||||
|
sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
|
||||||
|
fprintf(compiler->verbose, ", ");
|
||||||
|
sljit_verbose_param(compiler, src, srcw);
|
||||||
|
fprintf(compiler->verbose, "\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADJUST_LOCAL_OFFSET(src, srcw);
|
||||||
|
CHECK_EXTRA_REGS(src, srcw, (void)0);
|
||||||
|
|
||||||
|
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
|
||||||
|
compiler->mode32 = dst_reg & SLJIT_INT_OP;
|
||||||
|
#endif
|
||||||
|
dst_reg &= ~SLJIT_INT_OP;
|
||||||
|
|
||||||
|
if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
|
||||||
|
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
|
||||||
|
src = TMP_REG1;
|
||||||
|
srcw = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
|
||||||
|
FAIL_IF(!inst);
|
||||||
|
*inst++ = GROUP_0F;
|
||||||
|
*inst = get_jump_code(type & 0xff) - 0x40;
|
||||||
|
return SLJIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue