SSE2 refactor, JIT compiler update.

This commit is contained in:
Zoltán Herczeg 2015-08-30 05:30:43 +00:00
parent 170644eca3
commit ccda7d218f
3 changed files with 171 additions and 59 deletions

View File

@ -4146,12 +4146,20 @@ if (firstline)
{
SLJIT_ASSERT(common->first_line_end != 0);
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
JUMPHERE(quit);
OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1));
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
if (sljit_x86_is_cmov_available())
{
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
}
#endif
{
quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
JUMPHERE(quit);
}
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
@ -4163,49 +4171,55 @@ if (common->utf && offset > 0)
/* SSE2 accelerated first character search. */
if (sljit_is_fpu_available())
if (sljit_x86_is_sse2_available())
{
fast_forward_first_char2_sse2(common, char1, char2);
quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
if (firstline)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
else
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
if (offset > 0)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
if (common->mode == PCRE2_JIT_COMPLETE)
{
/* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
SLJIT_ASSERT(common->forced_quit_label == NULL);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf && offset > 0)
{
utf_quit = JUMP(SLJIT_JUMP);
if (common->utf && offset > 0)
{
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
JUMPHERE(quit);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
#if PCRE2_CODE_UNIT_WIDTH == 8
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
#elif PCRE2_CODE_UNIT_WIDTH == 16
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
#else
#error "Unknown code width"
#endif
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPHERE(utf_quit);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
#endif
if (offset > 0)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
}
else if (sljit_x86_is_cmov_available())
{
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
}
else
#endif
{
quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
OP1(SLJIT_MOV, STR_PTR, 0, firstline ? SLJIT_MEM1(SLJIT_SP) : STR_END, firstline ? common->first_line_end : 0);
JUMPHERE(quit);
if (offset > 0)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
}
if (firstline)
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
return;
}

View File

@ -869,34 +869,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler
sljit_si src1, sljit_sw src1w,
sljit_si src2, sljit_sw src2w);
/* The following function is a helper function for sljit_emit_op_custom.
It returns with the real machine register index ( >=0 ) of any SLJIT_R,
SLJIT_S and SLJIT_SP registers.
Note: it returns with -1 for virtual registers (only on x86-32). */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
/* The following function is a helper function for sljit_emit_op_custom.
It returns with the real machine register index of any SLJIT_FLOAT register.
Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
/* Any instruction can be inserted into the instruction stream by
sljit_emit_op_custom. It has a similar purpose as inline assembly.
The size parameter must match to the instruction size of the target
architecture:
x86: 0 < size <= 15. The instruction argument can be byte aligned.
Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
if size == 4, the instruction argument must be 4 byte aligned.
Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size);
/* Returns with non-zero if fpu is available. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void);
@ -1214,4 +1186,64 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct
#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
/* --------------------------------------------------------------------- */
/* CPU specific functions */
/* --------------------------------------------------------------------- */
/* The following function is a helper function for sljit_emit_op_custom.
It returns with the real machine register index ( >=0 ) of any SLJIT_R,
SLJIT_S and SLJIT_SP registers.
Note: it returns with -1 for virtual registers (only on x86-32). */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg);
/* The following function is a helper function for sljit_emit_op_custom.
It returns with the real machine register index of any SLJIT_FLOAT register.
Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg);
/* Any instruction can be inserted into the instruction stream by
sljit_emit_op_custom. It has a similar purpose as inline assembly.
The size parameter must match to the instruction size of the target
architecture:
x86: 0 < size <= 15. The instruction argument can be byte aligned.
Thumb2: if size == 2, the instruction argument must be 2 byte aligned.
if size == 4, the instruction argument must be 4 byte aligned.
Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_si size);
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
/* Returns with non-zero if sse2 is available. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void);
/* Returns with non-zero if cmov instruction is available. */
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void);
/* Emit a conditional mov instruction on x86 CPUs. This instruction
moves src to destination, if the condition is satisfied. Unlike
other arithmetic instructions, destination must be a register.
Before such instructions are emitted, cmov support should be
checked by sljit_x86_is_cmov_available function.
type must be between SLJIT_EQUAL and SLJIT_S_ORDERED
dst_reg must be a valid register and it can be combined
with SLJIT_INT_OP to perform 32 bit arithmetic
Flags: I - (never set any flags)
*/
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
sljit_si type,
sljit_si dst_reg,
sljit_si src, sljit_sw srcw);
#endif
#endif /* _SLJIT_LIR_H_ */

View File

@ -2936,3 +2936,69 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
{
*(sljit_sw*)addr = new_constant;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
{
#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
if (cpu_has_sse2 == -1)
get_cpu_features();
return cpu_has_sse2;
#else
return 1;
#endif
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
{
if (cpu_has_cmov == -1)
get_cpu_features();
return cpu_has_cmov;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
sljit_si type,
sljit_si dst_reg,
sljit_si src, sljit_sw srcw)
{
sljit_ub* inst;
CHECK_ERROR();
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(sljit_x86_is_cmov_available());
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
FUNCTION_CHECK_SRC(src, srcw);
#endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
!(dst_reg & SLJIT_INT_OP) ? "" : ".i",
JUMP_PREFIX(type), jump_names[type & 0xff]);
sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
fprintf(compiler->verbose, ", ");
sljit_verbose_param(compiler, src, srcw);
fprintf(compiler->verbose, "\n");
}
#endif
ADJUST_LOCAL_OFFSET(src, srcw);
CHECK_EXTRA_REGS(src, srcw, (void)0);
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
compiler->mode32 = dst_reg & SLJIT_INT_OP;
#endif
dst_reg &= ~SLJIT_INT_OP;
if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
src = TMP_REG1;
srcw = 0;
}
inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
FAIL_IF(!inst);
*inst++ = GROUP_0F;
*inst = get_jump_code(type & 0xff) - 0x40;
return SLJIT_SUCCESS;
}