From 28e73c60e30d7e85807ecb124fba17505db687cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Herczeg?= Date: Wed, 29 Nov 2017 13:30:31 +0000 Subject: [PATCH] JIT compiler update. --- src/pcre2_jit_compile.c | 19 +- src/pcre2_jit_match.c | 2 +- src/sljit/sljitConfig.h | 6 +- src/sljit/sljitConfigInternal.h | 39 +-- src/sljit/sljitLir.c | 263 ++++++++++++-- src/sljit/sljitLir.h | 169 ++++++--- src/sljit/sljitNativeARM_32.c | 522 +++++++++++++++++++++------- src/sljit/sljitNativeARM_64.c | 65 +++- src/sljit/sljitNativeARM_T2_32.c | 354 +++++++++++++++++-- src/sljit/sljitNativeMIPS_32.c | 229 ++++++++++++ src/sljit/sljitNativeMIPS_64.c | 129 +++++++ src/sljit/sljitNativeMIPS_common.c | 155 ++++----- src/sljit/sljitNativePPC_64.c | 55 +++ src/sljit/sljitNativePPC_common.c | 118 +++++-- src/sljit/sljitNativeSPARC_32.c | 119 +++++++ src/sljit/sljitNativeSPARC_common.c | 143 +++++--- src/sljit/sljitNativeX86_32.c | 403 +++++++++++++++++---- src/sljit/sljitNativeX86_64.c | 213 +++++++++--- src/sljit/sljitNativeX86_common.c | 159 +++++---- src/sljit/sljitUtils.c | 18 +- 20 files changed, 2545 insertions(+), 635 deletions(-) diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index c7bf0b2..23fbe5b 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -228,7 +228,7 @@ enum control_types { type_then_trap = 1 }; -typedef int (SLJIT_CALL *jit_function)(jit_arguments *args); +typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args); /* The following structure is the key data type for the recursive code generator. It is allocated by compile_matchingpath, and contains @@ -2753,7 +2753,7 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base)); } -static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg) +static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg) { while (current != NULL) { @@ -6057,7 +6057,7 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); #if defined SUPPORT_UNICODE -static PCRE2_SPTR SLJIT_CALL do_utf_caselesscmp(PCRE2_SPTR src1, jit_arguments *args, PCRE2_SPTR end1) +static PCRE2_SPTR SLJIT_FUNC do_utf_caselesscmp(PCRE2_SPTR src1, jit_arguments *args, PCRE2_SPTR end1) { /* This function would be ineffective to do in JIT level. */ sljit_u32 c1, c2; @@ -7598,8 +7598,9 @@ if (common->utf && *cc == OP_REFI) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), STR_PTR, 0); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + if (common->mode == PCRE2_JIT_COMPLETE) add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); else @@ -7924,7 +7925,7 @@ BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); return cc + 1 + LINK_SIZE; } -static int SLJIT_CALL do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) { PCRE2_SPTR begin; PCRE2_SIZE *ovector; @@ -8038,7 +8039,7 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); /* SLJIT_R0 = arguments */ OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); -sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); free_stack(common, callout_arg_size); @@ -11283,7 +11284,7 @@ if (opcode == OP_SKIP_ARG) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); - sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); @@ -11957,7 +11958,7 @@ if (!compiler) common->compiler = compiler; /* Main pcre_jit_exec entry. */ -sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size); +sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); @@ -12206,7 +12207,7 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0); OP2(SLJIT_SUB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE); -sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); diff --git a/src/pcre2_jit_match.c b/src/pcre2_jit_match.c index 2f840ab..1e3339e 100644 --- a/src/pcre2_jit_match.c +++ b/src/pcre2_jit_match.c @@ -118,7 +118,7 @@ if ((options & PCRE2_PARTIAL_HARD) != 0) else if ((options & PCRE2_PARTIAL_SOFT) != 0) index = 1; -if (functions->executable_funcs == NULL || functions->executable_funcs[index] == NULL) +if (functions == NULL || functions->executable_funcs[index] == NULL) return PCRE2_ERROR_JIT_BADOPTION; /* Sanity checks should be handled by pcre_exec. */ diff --git a/src/sljit/sljitConfig.h b/src/sljit/sljitConfig.h index b65584a..d54b5e6 100644 --- a/src/sljit/sljitConfig.h +++ b/src/sljit/sljitConfig.h @@ -108,8 +108,10 @@ /* Force cdecl calling convention even if a better calling convention (e.g. fastcall) is supported by the C compiler. - If this option is enabled, C functions without - SLJIT_CALL can also be called from JIT code. */ + If this option is disabled (this is the default), functions + called from JIT should be defined with SLJIT_FUNC attribute. + Standard C functions can still be called by using the + SLJIT_CALL_CDECL jump type. */ #ifndef SLJIT_USE_CDECL_CALLING_CONVENTION /* Disabled by default */ #define SLJIT_USE_CDECL_CALLING_CONVENTION 0 diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h index cc0810f..12fe2c2 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/src/sljit/sljitConfigInternal.h @@ -60,11 +60,13 @@ a single precision floating point array by index SLJIT_F64_SHIFT : the shift required to apply when accessing a double precision floating point array by index + SLJIT_PREF_SHIFT_REG : x86 systems prefers ecx for shifting by register + the scratch register index of ecx is stored in this variable SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET) SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address Other macros: - SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT + SLJIT_FUNC : calling convention attribute for both calling JIT form C and C calling back from JIT SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) */ @@ -471,44 +473,44 @@ typedef double sljit_f64; /* Calling convention of functions generated by SLJIT or called from the generated code. */ /*****************************************************************************************/ -#ifndef SLJIT_CALL +#ifndef SLJIT_FUNC #if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION) /* Force cdecl. */ -#define SLJIT_CALL +#define SLJIT_FUNC #elif (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if defined(__GNUC__) && !defined(__APPLE__) -#define SLJIT_CALL __attribute__ ((fastcall)) +#define SLJIT_FUNC __attribute__ ((fastcall)) #define SLJIT_X86_32_FASTCALL 1 #elif defined(_MSC_VER) -#define SLJIT_CALL __fastcall +#define SLJIT_FUNC __fastcall #define SLJIT_X86_32_FASTCALL 1 #elif defined(__BORLANDC__) -#define SLJIT_CALL __msfastcall +#define SLJIT_FUNC __msfastcall #define SLJIT_X86_32_FASTCALL 1 #else /* Unknown compiler. */ /* The cdecl attribute is the default. */ -#define SLJIT_CALL +#define SLJIT_FUNC #endif #else /* Non x86-32 architectures. */ -#define SLJIT_CALL +#define SLJIT_FUNC #endif /* SLJIT_CONFIG_X86_32 */ -#endif /* !SLJIT_CALL */ +#endif /* !SLJIT_FUNC */ #ifndef SLJIT_INDIRECT_CALL #if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \ @@ -557,24 +559,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 9 -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) -#else -/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */ -#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) -#endif /* SLJIT_X86_32_FASTCALL */ +#define SLJIT_PREF_SHIFT_REG SLJIT_R2 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#ifndef _WIN64 #define SLJIT_NUMBER_OF_REGISTERS 13 +#ifndef _WIN64 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 #define SLJIT_LOCALS_OFFSET_BASE 0 -#else -#define SLJIT_NUMBER_OF_REGISTERS 13 +#else /* _WIN64 */ #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) -#endif /* _WIN64 */ +#endif /* !_WIN64 */ +#define SLJIT_PREF_SHIFT_REG SLJIT_R3 #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) @@ -622,8 +620,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_REGISTERS 18 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -/* Add +1 for double alignment. */ -#define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw)) +/* saved registers (16), return struct pointer (1), space for 6 argument words (1), + 4th double arg (2), double alignment (1). */ +#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * sizeof(sljit_sw)) #endif #elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c index c0bbb52..11a37a5 100644 --- a/src/sljit/sljitLir.c +++ b/src/sljit/sljitLir.c @@ -97,8 +97,13 @@ #define GET_ALL_FLAGS(op) \ ((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define TYPE_CAST_NEEDED(op) \ + (((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S32) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S32)) +#else #define TYPE_CAST_NEEDED(op) \ (((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S16)) +#endif #define BUF_SIZE 4096 @@ -118,6 +123,9 @@ /* When reg can be unused. */ #define SLOW_IS_REG(reg) ((reg) > 0 && (reg) <= REG_MASK) +/* Mask for argument types. */ +#define SLJIT_DEF_MASK ((1 << SLJIT_DEF_SHIFT) - 1) + /* Jump flags. */ #define JUMP_LABEL 0x1 #define JUMP_ADDR 0x2 @@ -591,6 +599,19 @@ static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) compiler->buf = prev; } +static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types) +{ + sljit_s32 arg_count = 0; + + arg_types >>= SLJIT_DEF_SHIFT; + while (arg_types) { + arg_count++; + arg_types >>= SLJIT_DEF_SHIFT; + } + + return arg_count; +} + static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) @@ -864,7 +885,11 @@ static char* jump_names[] = { (char*)"greater", (char*)"less_equal", (char*)"unordered", (char*)"ordered", (char*)"jump", (char*)"fast_call", - (char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3" + (char*)"call", (char*)"call.cdecl" +}; + +static char* call_arg_names[] = { + (char*)"void", (char*)"sw", (char*)"uw", (char*)"s32", (char*)"u32", (char*)"f32", (char*)"f64" }; #endif /* SLJIT_VERBOSE */ @@ -897,53 +922,104 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_com } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 types, arg_count, curr_type; +#endif + SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); - CHECK_ARGUMENT(args >= 0 && args <= 3); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(args <= saveds); CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + CHECK_ARGUMENT((arg_types & SLJIT_DEF_MASK) == 0); + + types = (arg_types >> SLJIT_DEF_SHIFT); + arg_count = 0; + while (types != 0 && arg_count < 3) { + curr_type = (types & SLJIT_DEF_MASK); + CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW); + arg_count++; + types >>= SLJIT_DEF_SHIFT; + } + CHECK_ARGUMENT(arg_count <= saveds && types == 0); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " enter options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", - args, scratches, saveds, fscratches, fsaveds, local_size); + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " enter options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : ""); + + arg_types >>= SLJIT_DEF_SHIFT; + while (arg_types) { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } + + fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + scratches, saveds, fscratches, fsaveds, local_size); + } #endif CHECK_RETURN_OK; } static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 types, arg_count, curr_type; +#endif + + SLJIT_UNUSED_ARG(compiler); + #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); - CHECK_ARGUMENT(args >= 0 && args <= 3); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); - CHECK_ARGUMENT(args <= saveds); CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + + types = (arg_types >> SLJIT_DEF_SHIFT); + arg_count = 0; + while (types != 0 && arg_count < 3) { + curr_type = (types & SLJIT_DEF_MASK); + CHECK_ARGUMENT(curr_type == SLJIT_ARG_TYPE_SW || curr_type == SLJIT_ARG_TYPE_UW); + arg_count++; + types >>= SLJIT_DEF_SHIFT; + } + CHECK_ARGUMENT(arg_count <= saveds && types == 0); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " set_context options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", - args, scratches, saveds, fscratches, fsaveds, local_size); + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " set_context options:%s args[", (options & SLJIT_F64_ALIGNMENT) ? "f64_align" : ""); + + arg_types >>= SLJIT_DEF_SHIFT; + while (arg_types) { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } + + fprintf(compiler->verbose, "] scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + scratches, saveds, fscratches, fsaveds, local_size); + } #endif CHECK_RETURN_OK; } @@ -1417,9 +1493,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); - CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP)); - CHECK_ARGUMENT((type & 0xff) <= SLJIT_CALL0 || ((type & 0xff) - SLJIT_CALL0) <= compiler->scratches); if ((type & 0xff) < SLJIT_JUMP) { if ((type & 0xff) <= SLJIT_NOT_ZERO) @@ -1439,6 +1514,63 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 i, types, curr_type, scratches, fscratches; + + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + + types = arg_types; + scratches = 0; + fscratches = 0; + for (i = 0; i < 5; i++) { + curr_type = (types & SLJIT_DEF_MASK); + CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64); + if (i > 0) { + if (curr_type == 0) { + break; + } + if (curr_type >= SLJIT_ARG_TYPE_F32) + fscratches++; + else + scratches++; + } else { + if (curr_type >= SLJIT_ARG_TYPE_F32) { + CHECK_ARGUMENT(compiler->fscratches > 0); + } else if (curr_type >= SLJIT_ARG_TYPE_SW) { + CHECK_ARGUMENT(compiler->scratches > 0); + } + } + types >>= SLJIT_DEF_SHIFT; + } + CHECK_ARGUMENT(compiler->scratches >= scratches); + CHECK_ARGUMENT(compiler->fscratches >= fscratches); + CHECK_ARGUMENT(types == 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]); + + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + fprintf(compiler->verbose, "]\n"); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -1488,20 +1620,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile CHECK_RETURN_OK; } -static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src, sljit_sw srcw) { -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->last_flags = 0; -#endif - if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; CHECK_RETURN_OK; } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); - CHECK_ARGUMENT(type <= SLJIT_CALL0 || (type - SLJIT_CALL0) <= compiler->scratches); + CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_FAST_CALL); FUNCTION_CHECK_SRC(src, srcw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1514,6 +1642,66 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compil CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 i, types, curr_type, scratches, fscratches; + + CHECK_ARGUMENT(type == SLJIT_CALL || type == SLJIT_CALL_CDECL); + FUNCTION_CHECK_SRC(src, srcw); + + types = arg_types; + scratches = 0; + fscratches = 0; + for (i = 0; i < 5; i++) { + curr_type = (types & SLJIT_DEF_MASK); + CHECK_ARGUMENT(curr_type <= SLJIT_ARG_TYPE_F64); + if (i > 0) { + if (curr_type == 0) { + break; + } + if (curr_type >= SLJIT_ARG_TYPE_F32) + fscratches++; + else + scratches++; + } else { + if (curr_type >= SLJIT_ARG_TYPE_F32) { + CHECK_ARGUMENT(compiler->fscratches > 0); + } else if (curr_type >= SLJIT_ARG_TYPE_SW) { + CHECK_ARGUMENT(compiler->scratches > 0); + } + } + types >>= SLJIT_DEF_SHIFT; + } + CHECK_ARGUMENT(compiler->scratches >= scratches); + CHECK_ARGUMENT(compiler->fscratches >= fscratches); + CHECK_ARGUMENT(types == 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " i%s%s ret[%s", jump_names[type & 0xff], + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", call_arg_names[arg_types & SLJIT_DEF_MASK]); + + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) { + fprintf(compiler->verbose, "], args["); + do { + fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_DEF_MASK]); + arg_types >>= SLJIT_DEF_SHIFT; + if (arg_types) + fprintf(compiler->verbose, ","); + } while (arg_types); + } + fprintf(compiler->verbose, "], "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) @@ -1943,12 +2131,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(options); - SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(arg_types); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); SLJIT_UNUSED_ARG(fscratches); @@ -1959,12 +2147,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(options); - SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(arg_types); SLJIT_UNUSED_ARG(scratches); SLJIT_UNUSED_ARG(saveds); SLJIT_UNUSED_ARG(fscratches); @@ -2109,6 +2297,16 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return NULL; } +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNREACHABLE(); + return NULL; +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -2161,6 +2359,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi return SLJIT_ERR_UNSUPPORTED; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(arg_types); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h index 470c84f..925178f 100644 --- a/src/sljit/sljitLir.h +++ b/src/sljit/sljitLir.h @@ -213,14 +213,6 @@ of sljitConfigInternal.h */ #define SLJIT_RETURN_REG SLJIT_R0 -/* x86 prefers specific registers for special purposes. In case of shift - by register it supports only SLJIT_R2 for shift argument - (which is the src2 argument of sljit_emit_op2). If another register is - used, sljit must exchange data between registers which cause a minor - slowdown. Other architectures has no such limitation. */ - -#define SLJIT_PREF_SHIFT_REG SLJIT_R2 - /* --------------------------------------------------------------------- */ /* Floating point registers */ /* --------------------------------------------------------------------- */ @@ -257,6 +249,79 @@ of sljitConfigInternal.h */ /* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ #define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) +/* --------------------------------------------------------------------- */ +/* Argument type definitions */ +/* --------------------------------------------------------------------- */ + +/* Argument type definitions. + Used by SLJIT_[DEF_]ARGx and SLJIT_[DEF]_RET macros. */ + +#define SLJIT_ARG_TYPE_VOID 0 +#define SLJIT_ARG_TYPE_SW 1 +#define SLJIT_ARG_TYPE_UW 2 +#define SLJIT_ARG_TYPE_S32 3 +#define SLJIT_ARG_TYPE_U32 4 +#define SLJIT_ARG_TYPE_F32 5 +#define SLJIT_ARG_TYPE_F64 6 + +/* The following argument type definitions are used by sljit_emit_enter, + sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. + The following return type definitions are used by sljit_emit_call + and sljit_emit_icall functions. + + When a function is called, the first integer argument must be placed + in SLJIT_R0, the second in SLJIT_R1, and so on. Similarly the first + floating point argument must be placed in SLJIT_FR0, the second in + SLJIT_FR1, and so on. + + Example function definition: + sljit_f32 SLJIT_FUNC example_c_callback(sljit_sw arg_a, + sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); + + Argument type definition: + SLJIT_DEF_RET(SLJIT_ARG_TYPE_F32) + | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F64) + | SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_U32) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F32) + + Short form of argument type definition: + SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64) + | SLJIT_ARG3(S32) | SLJIT_ARG4(F32) + + Argument passing: + arg_a must be placed in SLJIT_R0 + arg_c must be placed in SLJIT_R1 + arg_b must be placed in SLJIT_FR0 + arg_d must be placed in SLJIT_FR1 + +Note: + The SLJIT_ARG_TYPE_VOID type is only supported by + SLJIT_DEF_RET, and SLJIT_ARG_TYPE_VOID is also the + default value when SLJIT_DEF_RET is not specified. */ +#define SLJIT_DEF_SHIFT 4 +#define SLJIT_DEF_RET(type) (type) +#define SLJIT_DEF_ARG1(type) ((type) << SLJIT_DEF_SHIFT) +#define SLJIT_DEF_ARG2(type) ((type) << (2 * SLJIT_DEF_SHIFT)) +#define SLJIT_DEF_ARG3(type) ((type) << (3 * SLJIT_DEF_SHIFT)) +#define SLJIT_DEF_ARG4(type) ((type) << (4 * SLJIT_DEF_SHIFT)) + +/* Short form of the macros above. + + For example the following definition: + SLJIT_DEF_RET(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_F32) + + can be shortened to: + SLJIT_RET(SW) | SLJIT_ARG1(F32) + +Note: + The VOID type is only supported by SLJIT_RET, and + VOID is also the default value when SLJIT_RET is + not specified. */ +#define SLJIT_RET(type) SLJIT_DEF_RET(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG1(type) SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG2(type) SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG3(type) SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_ ## type) +#define SLJIT_ARG4(type) SLJIT_DEF_ARG4(SLJIT_ARG_TYPE_ ## type) + /* --------------------------------------------------------------------- */ /* Main structures and functions */ /* --------------------------------------------------------------------- */ @@ -331,6 +396,7 @@ struct sljit_compiler { sljit_s32 args; sljit_s32 locals_offset; sljit_s32 saveds_offset; + sljit_s32 stack_tmp_size; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -505,8 +571,6 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler #define SLJIT_HAS_CLZ 3 /* [Emulated] Conditional move is supported. */ #define SLJIT_HAS_CMOV 4 -/* [Limitation] [Emulated] Shifting with register is limited to SLJIT_PREF_SHIFT_REG. */ -#define SLJIT_HAS_PREF_SHIFT_REG 5 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) /* [Not emulated] SSE2 support is available on x86. */ @@ -519,27 +583,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) error, they return with SLJIT_SUCCESS. */ /* - The executable code is a function call from the viewpoint of the C + The executable code is a function from the viewpoint of the C language. The function calls must obey to the ABI (Application Binary Interface) of the platform, which specify the purpose of - all machine registers and stack handling among other things. The + machine registers and stack handling among other things. The sljit_emit_enter function emits the necessary instructions for setting up a new context for the executable code and moves function arguments to the saved registers. Furthermore the options argument can be used to pass configuration options to the compiler. The available options are listed before sljit_emit_enter. - The number of sljit_sw arguments passed to the generated function - are specified in the "args" parameter. The number of arguments must - be less than or equal to 3. The first argument goes to SLJIT_S0, - the second goes to SLJIT_S1 and so on. The register set used by - the function must be declared as well. The number of scratch and - saved registers used by the function must be passed to sljit_emit_enter. - Only R registers between R0 and "scratches" argument can be used - later. E.g. if "scratches" is set to 2, the register set will be - limited to R0 and R1. The S registers and the floating point + The function argument list is the combination of SLJIT_ARGx + (SLJIT_DEF_ARG1) macros. Currently maximum 3 SW / UW + (SLJIT_ARG_TYPE_SW / LJIT_ARG_TYPE_UW) arguments are supported. + The first argument goes to SLJIT_S0, the second goes to SLJIT_S1 + and so on. The register set used by the function must be declared + as well. The number of scratch and saved registers used by the + function must be passed to sljit_emit_enter. Only R registers + between R0 and "scratches" argument can be used later. E.g. if + "scratches" is set to 2, the scratch register set will be limited + to SLJIT_R0 and SLJIT_R1. The S registers and the floating point registers ("fscratches" and "fsaveds") are specified in a similar - way. The sljit_emit_enter is also capable of allocating a stack + manner. The sljit_emit_enter is also capable of allocating a stack space for local variables. The "local_size" argument contains the size in bytes of this local area and its staring address is stored in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and @@ -566,7 +631,7 @@ offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */ #define SLJIT_MAX_LOCAL_SIZE 65536 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); /* The machine code has a context (which contains the local stack space size, @@ -580,7 +645,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi the previous context. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size); /* Return from machine code. The op argument can be SLJIT_UNUSED which means the @@ -1136,25 +1201,32 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi /* Unconditional jump types. */ #define SLJIT_JUMP 24 + /* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */ #define SLJIT_FAST_CALL 25 -#define SLJIT_CALL0 26 -#define SLJIT_CALL1 27 -#define SLJIT_CALL2 28 -#define SLJIT_CALL3 29 - -/* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */ + /* Called function must be declared with the SLJIT_FUNC attribute. */ +#define SLJIT_CALL 26 + /* Called function must be decalred with cdecl attribute. + This is the default attribute for C functions. */ +#define SLJIT_CALL_CDECL 27 /* The target can be changed during runtime (see: sljit_set_jump_addr). */ #define SLJIT_REWRITABLE_JUMP 0x1000 /* Emit a jump instruction. The destination is not set, only the type of the jump. - type must be between SLJIT_EQUAL and SLJIT_CALL3 + type must be between SLJIT_EQUAL and SLJIT_FAST_CALL type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP - Flags: does not modify flags for conditional and unconditional - jumps but destroy all flags for calls. */ + Flags: does not modify flags. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type); +/* Emit a C compiler (ABI) compatible function call. + type must be SLJIT_CALL or SLJIT_CALL_CDECL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros + + Flags: destroy all flags. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types); + /* Basic arithmetic comparison. In most architectures it is implemented as an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting appropriate flags) followed by a sljit_emit_jump. However some @@ -1162,6 +1234,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile It is suggested to use this comparison form when appropriate. type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + Flags: may destroy flags. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, @@ -1186,15 +1259,23 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl /* Set the destination address of the jump to this label. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target); -/* Call function or jump anywhere. Both direct and indirect form - type must be between SLJIT_JUMP and SLJIT_CALL3 - Direct form: set src to SLJIT_IMM() and srcw to the address - Indirect form: any other valid addressing mode +/* Emit an indirect jump or fast call. Both direct and indirect form + Direct form: set src to SLJIT_IMM() and srcw to the address + Indirect form: any other valid addressing mode + type must be between SLJIT_JUMP and SLJIT_FAST_CALL - Flags: does not modify flags for unconditional jumps but - destroy all flags for calls. */ + Flags: does not modify flags. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw); +/* Emit a C compiler (ABI) compatible function call. + Direct form: set src to SLJIT_IMM() and srcw to the address + Indirect form: any other valid addressing mode + type must be SLJIT_CALL or SLJIT_CALL_CDECL + arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros + + Flags: destroy all flags. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw); + /* Perform the operation using the conditional flags as the second argument. Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_F64. The value represented by the type is 1, if the condition represented by the type @@ -1270,8 +1351,8 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void); #if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) /* This global lock is useful to compile common functions. */ -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void); -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_grab_lock(void); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void); #endif #if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) @@ -1312,8 +1393,8 @@ struct sljit_stack { Note: limit contains the starting stack size in bytes. Note: the top field is initialized to base. Note: see sljit_create_compiler for the explanation of allocator_data. */ -SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data); -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data); +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data); /* Can be used to increase (allocate) or decrease (free) the memory area. Returns with a non-zero value if unsuccessful. If new_limit is greater than @@ -1321,7 +1402,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *st since the growth ratio can be added to the current limit, and sljit_stack_resize will do all the necessary checks. The fields of the stack are not changed if sljit_stack_resize fails. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit); +SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit); #endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c index 745da99..441d3fa 100644 --- a/src/sljit/sljitNativeARM_32.c +++ b/src/sljit/sljitNativeARM_32.c @@ -24,12 +24,18 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#ifdef __SOFTFP__ +#define ARM_ABI_INFO " ABI:softfp" +#else +#define ARM_ABI_INFO " ABI:hardfp" +#endif + SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - return "ARMv7" SLJIT_CPUINFO; + return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO; #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - return "ARMv5" SLJIT_CPUINFO; + return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO; #else #error "Internal error: Unknown ARM architecture" #endif @@ -40,8 +46,8 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) -#define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) /* In ARM instruction words. Cache lines are usually 32 byte aligned. */ @@ -55,7 +61,11 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 14, 12, 15 + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7 }; #define RM(rm) (reg_map[rm]) @@ -72,32 +82,31 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define CONDITIONAL 0xe0000000 #define PUSH_POOL 0xff000000 -/* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */ -#define ADC_DP 0x5 -#define ADD_DP 0x4 -#define AND_DP 0x0 +#define ADC 0xe0a00000 +#define ADD 0xe0800000 +#define AND 0xe0000000 #define B 0xea000000 -#define BIC_DP 0xe +#define BIC 0xe1c00000 #define BL 0xeb000000 #define BLX 0xe12fff30 #define BX 0xe12fff10 #define CLZ 0xe16f0f10 -#define CMN_DP 0xb -#define CMP_DP 0xa +#define CMN 0xe1600000 +#define CMP 0xe1400000 #define BKPT 0xe1200070 -#define EOR_DP 0x1 -#define MOV_DP 0xd +#define EOR 0xe0200000 +#define MOV 0xe1a00000 #define MUL 0xe0000090 -#define MVN_DP 0xf +#define MVN 0xe1e00000 #define NOP 0xe1a00000 -#define ORR_DP 0xc +#define ORR 0xe1800000 #define PUSH 0xe92d0000 #define POP 0xe8bd0000 -#define RSB_DP 0x3 -#define RSC_DP 0x7 -#define SBC_DP 0x6 +#define RSB 0xe0600000 +#define RSC 0xe0e00000 +#define SBC 0xe0c00000 #define SMULL 0xe0c00090 -#define SUB_DP 0x2 +#define SUB 0xe0400000 #define UMULL 0xe0800090 #define VABS_F32 0xeeb00ac0 #define VADD_F32 0xee300a00 @@ -108,6 +117,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define VDIV_F32 0xee800a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 +#define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 @@ -260,7 +270,9 @@ static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler) { /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); - return push_inst(compiler, BLX | RM(TMP_REG2)); + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + + return push_inst(compiler, BLX | RM(TMP_REG1)); } static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) @@ -889,25 +901,21 @@ static const sljit_uw data_transfer_insts[16] = { #define TYPE2_TRANSFER_IMM(imm) \ (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) -/* Condition: AL. */ -#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ - (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) - static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 size, i, tmp; + sljit_s32 args, size, i, tmp; sljit_uw push; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); /* Push saved registers, temporary registers stmdb sp!, {..., lr} */ @@ -929,25 +937,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (local_size > 0) FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); + args = get_arg_count(arg_types); + if (args >= 1) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0)))); + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0) | RM(SLJIT_R0))); if (args >= 2) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1)))); + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S1) | RM(SLJIT_R1))); if (args >= 3) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2)))); + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S2) | RM(SLJIT_R2))); return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_s32 size; CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); compiler->local_size = ((size + local_size + 7) & ~7) - size; @@ -1009,12 +1019,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ \ if (compiler->shift_imm != 0) \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \ - dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | RM(src2))); \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); \ + return push_inst(compiler, MOV | (flags & SET_FLAGS) | \ + RD(dst) | (compiler->shift_imm << 7) | (opcode << 5) | RM(src2)); \ + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2)); \ } \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \ - dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1))); + return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | \ + (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)); static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) @@ -1024,10 +1034,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); if (dst != src2) { if (src2 & SRC2_IMM) { - return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, - dst, SLJIT_UNUSED, src2)); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); } - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(src2))); + return push_inst(compiler, MOV | RD(dst) | RM(src2)); } return SLJIT_SUCCESS; @@ -1037,17 +1046,16 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & MOVE_REG_CONV) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (op == SLJIT_MOV_U8) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | RM(src2)))); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst))); + return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff); + FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2))); + return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst)); #else return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); #endif } else if (dst != src2) { SLJIT_ASSERT(src2 & SRC2_IMM); - return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, - dst, SLJIT_UNUSED, src2)); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); } return SLJIT_SUCCESS; @@ -1056,25 +1064,23 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); if (flags & MOVE_REG_CONV) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | RM(src2)))); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst))); + FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2))); + return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst)); #else return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); #endif } else if (dst != src2) { SLJIT_ASSERT(src2 & SRC2_IMM); - return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, - dst, SLJIT_UNUSED, src2)); + return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2); } return SLJIT_SUCCESS; case SLJIT_NOT: if (src2 & SRC2_IMM) { - return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MOV_DP : MVN_DP, flags & SET_FLAGS, - dst, SLJIT_UNUSED, src2)); + return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2); } - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); + return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2)); case SLJIT_CLZ: SLJIT_ASSERT(!(flags & INV_IMM)); @@ -1085,28 +1091,24 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMN_DP, SET_FLAGS, - SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, flags & SET_FLAGS, - dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_ADDC: SLJIT_ASSERT(!(flags & INV_IMM)); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADC_DP, flags & SET_FLAGS, - dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_SUB: SLJIT_ASSERT(!(flags & INV_IMM)); if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, - SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SUB_DP : RSB_DP, flags & SET_FLAGS, - dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_SUBC: SLJIT_ASSERT(!(flags & INV_IMM)); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SBC_DP : RSC_DP, flags & SET_FLAGS, - dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_MUL: SLJIT_ASSERT(!(flags & INV_IMM)); @@ -1118,19 +1120,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1])); /* cmp TMP_REG1, dst asr #31. */ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG1, RM(dst) | 0xfc0)); + return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0); case SLJIT_AND: - return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & INV_IMM) ? AND_DP : BIC_DP, flags & SET_FLAGS, - dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS) + | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_OR: SLJIT_ASSERT(!(flags & INV_IMM)); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(ORR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_XOR: SLJIT_ASSERT(!(flags & INV_IMM)); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(EOR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_SHL: EMIT_SHIFT_INS_AND_RETURN(0); @@ -1293,8 +1295,8 @@ static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sl return 0; } - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1))); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2))); + FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1)); + FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2)); return 1; } #endif @@ -1311,11 +1313,11 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, /* Create imm by 1 inst. */ tmp = get_imm(imm); if (tmp) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); + return push_inst(compiler, MOV | RD(reg) | tmp); tmp = get_imm(~imm); if (tmp) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); + return push_inst(compiler, MVN | RD(reg) | tmp); #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) /* Create imm by 2 inst. */ @@ -1365,7 +1367,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit if (argw != 0 && !is_type1_transfer) { SLJIT_ASSERT(!(flags & WRITE_BACK)); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_reg, arg, RM(offset_reg) | (argw << 7)))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | (argw << 7))); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); } @@ -1381,7 +1383,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit imm = get_imm(argw & ~0xfff); if (imm) { offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm))); + FAIL_IF(push_inst(compiler, ADD | RD(offset_reg) | RN(arg) | imm)); argw = argw & 0xfff; arg = offset_reg; } @@ -1390,7 +1392,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit imm = get_imm(-argw & ~0xfff); if (imm) { offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm))); + FAIL_IF(push_inst(compiler, SUB | RD(offset_reg) | RN(arg) | imm)); argw = -(-argw & 0xfff); arg = offset_reg; } @@ -1408,7 +1410,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit imm = get_imm(argw & ~0xff); if (imm) { offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm))); + FAIL_IF(push_inst(compiler, ADD | RD(offset_reg) | RN(arg) | imm)); argw = argw & 0xff; arg = offset_reg; } @@ -1417,7 +1419,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit imm = get_imm(-argw & ~0xff); if (imm) { offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm))); + FAIL_IF(push_inst(compiler, SUB | RD(offset_reg) | RN(arg) | imm)); argw = -(-argw & 0xff); arg = offset_reg; } @@ -1785,7 +1787,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return reg << 1; + return (freg_map[reg] << 1); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1804,9 +1806,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c #define FPU_LOAD (1 << 20) #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ - ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs)) + ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg_map[freg] << 12) | (offs)) #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ - ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16)) + ((opcode) | (mode) | (freg_map[dst] << 12) | freg_map[src1] | (freg_map[src2] << 16)) static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { @@ -1817,7 +1819,7 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))); arg = TMP_REG2; argw = 0; } @@ -1831,20 +1833,20 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, imm = get_imm(argw & ~0x3fc); if (imm) { - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, imm))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); } imm = get_imm(-argw & ~0x3fc); if (imm) { argw = -argw; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG2, arg & REG_MASK, imm))); + FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm)); return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2)); } } if (arg) { FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(TMP_REG2)))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2))); } else FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); @@ -1866,7 +1868,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); if (FAST_IS_REG(dst)) - return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16)); + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (freg_map[TMP_FREG1] << 16)); /* Store the integer value from a VFP register. */ return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); @@ -1881,14 +1883,14 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp op ^= SLJIT_F32_OP; if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16))); + FAIL_IF(push_inst(compiler, VMOV | RD(src) | (freg_map[TMP_FREG1] << 16))); else if (src & SLJIT_MEM) { /* Load the integer value into a VFP register. */ FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); } else { FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16))); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (freg_map[TMP_FREG1] << 16))); } FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0))); @@ -2018,7 +2020,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil #undef FPU_LOAD #undef EMIT_FPU_DATA_TRANSFER -#undef EMIT_FPU_OPERATION /* --------------------------------------------------------------------- */ /* Other instructions */ @@ -2030,13 +2031,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - SLJIT_ASSERT(reg_map[TMP_REG1] == 14); + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); if (FAST_IS_REG(dst)) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG1))); + return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2)); /* Memory. */ - return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2); + return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) @@ -2045,16 +2046,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); - SLJIT_ASSERT(reg_map[TMP_REG1] == 14); + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, 0, RM(src)))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src))); else if (src & SLJIT_MEM) - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1)); else if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); - return push_inst(compiler, BX | RM(TMP_REG1)); + return push_inst(compiler, BX | RM(TMP_REG2)); } /* --------------------------------------------------------------------- */ @@ -2111,7 +2112,7 @@ static sljit_uw get_cc(sljit_s32 type) return 0x70000000; default: - SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL); return 0xe0000000; } } @@ -2144,12 +2145,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - /* In ARM, we don't need to touch the arguments. */ + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (type >= SLJIT_FAST_CALL) PTR_FAIL_IF(prepare_blx(compiler)); PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, - type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); + type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); if (jump->flags & SLJIT_REWRITABLE_JUMP) { jump->addr = compiler->size; @@ -2166,13 +2168,248 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #else if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; - PTR_FAIL_IF(emit_imm(compiler, TMP_REG2, 0)); - PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)) & ~COND_MASK) | get_cc(type))); + PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type))); jump->addr = compiler->size; #endif return jump; } +#ifdef __SOFTFP__ + +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 stack_offset = 0; + sljit_s32 arg_count = 0; + sljit_s32 word_arg_offset = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 src_offset = 4 * sizeof(sljit_sw); + sljit_u8 offsets[4]; + + if (src && FAST_IS_REG(*src)) + src_offset = reg_map[*src] * sizeof(sljit_sw); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_f32); + arg_count++; + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F64: + if (stack_offset & 0x7) + stack_offset += sizeof(sljit_sw); + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_f64); + arg_count++; + float_arg_count++; + break; + default: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_sw); + arg_count++; + word_arg_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (stack_offset > 16) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_offset - 16) + 0x7) & ~0x7))); + + /* Process arguments in reversed direction. */ + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + arg_count--; + float_arg_count--; + stack_offset = offsets[arg_count]; + + if (stack_offset < 16) { + if (src_offset == stack_offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + break; + case SLJIT_ARG_TYPE_F64: + arg_count--; + float_arg_count--; + stack_offset = offsets[arg_count]; + + SLJIT_ASSERT((stack_offset & 0x7) == 0); + + if (stack_offset < 16) { + if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + } else + FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + break; + default: + arg_count--; + word_arg_offset -= sizeof(sljit_sw); + stack_offset = offsets[arg_count]; + + SLJIT_ASSERT(stack_offset >= word_arg_offset); + + if (stack_offset != word_arg_offset) { + if (stack_offset < 16) { + if (src_offset == stack_offset) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2))); + *src = TMP_REG1; + } + else if (src_offset == word_arg_offset) { + *src = 1 + (stack_offset >> 2); + src_offset = stack_offset; + } + FAIL_IF(push_inst(compiler, MOV | (stack_offset << 10) | (word_arg_offset >> 2))); + } else + FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_DATA] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (stack_offset - 16))); + } + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_s32 stack_size = 0; + + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12))); + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + if (stack_size & 0x7) + stack_size += sizeof(sljit_sw); + stack_size += sizeof(sljit_f64); + break; + default: + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (stack_size <= 16) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | (((stack_size - 16) + 0x7) & ~0x7)); +} + +#else /* !__SOFTFP__ */ + +static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_u32 remap = 0; + sljit_u32 offset = 0; + sljit_u32 new_offset, mask; + + /* Remove return value. */ + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { + new_offset = 0; + mask = 1; + + while (remap & mask) { + new_offset++; + mask <<= 1; + } + remap |= mask; + + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, + 0, (new_offset >> 1) + 1, (offset >> 1) + 1, 0) | ((new_offset & 0x1) ? 0x400000 : 0))); + + offset += 2; + } + else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { + new_offset = 0; + mask = 3; + + while (remap & mask) { + new_offset += 2; + mask <<= 2; + } + remap |= mask; + + if (offset != new_offset) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, SLJIT_F32_OP, (new_offset >> 1) + 1, (offset >> 1) + 1, 0))); + + offset += 2; + } + arg_types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif /* __SOFTFP__ */ + +#undef EMIT_FPU_OPERATION + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#ifdef __SOFTFP__ + struct sljit_jump *jump; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#ifdef __SOFTFP__ + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; +#else /* !__SOFTFP__ */ + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +#endif /* __SOFTFP__ */ +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; @@ -2181,16 +2418,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); - /* In ARM, we don't need to touch the arguments. */ + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + if (!(src & SLJIT_IMM)) { - if (FAST_IS_REG(src)) + if (FAST_IS_REG(src)) { + SLJIT_ASSERT(reg_map[src] != 14); return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); + } SLJIT_ASSERT(src & SLJIT_MEM); - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2)); - return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)); } + /* These jumps are converted to jump/call instructions when possible. */ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); @@ -2199,22 +2440,57 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (type >= SLJIT_FAST_CALL) FAIL_IF(prepare_blx(compiler)); - FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0), 0)); + FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); if (type >= SLJIT_FAST_CALL) FAIL_IF(emit_blx(compiler)); #else - FAIL_IF(emit_imm(compiler, TMP_REG2, 0)); - FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2))); + FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); #endif jump->addr = compiler->size; return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#ifdef __SOFTFP__ + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + return softfloat_post_call_with_args(compiler, arg_types); +#else /* !__SOFTFP__ */ + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +#endif /* __SOFTFP__ */ +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); + sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op); sljit_uw cc, ins; CHECK_ERROR(); @@ -2223,31 +2499,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co op = GET_OPCODE(op); cc = get_cc(type & 0xff); - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0))); - FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc)); + FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0)); + FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc)); if (dst & SLJIT_MEM) return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2); return SLJIT_SUCCESS; } - ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP)); + ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR)); if (dst & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2)); - FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 1) & ~COND_MASK) | cc)); + FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc)); if (op == SLJIT_AND) - FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); + FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); if (dst & SLJIT_MEM) FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2)); if (flags & SLJIT_SET_Z) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_r))); + return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg)); return SLJIT_SUCCESS; } @@ -2267,11 +2543,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { tmp = get_imm(srcw); if (tmp) - return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc); + return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc); tmp = get_imm(~srcw); if (tmp) - return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MVN_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc); + return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc); #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) tmp = (sljit_uw) srcw; @@ -2285,7 +2561,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil #endif } - return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, RM(src)) & ~COND_MASK) | cc); + return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c index fd67f50..8ceac4d 100644 --- a/src/sljit/sljitNativeARM_64.c +++ b/src/sljit/sljitNativeARM_64.c @@ -40,23 +40,27 @@ typedef sljit_u32 sljit_ins; #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 6) -#define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { 31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31 }; +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7 +}; + #define W_OP (1 << 31) #define RD(rd) (reg_map[rd]) #define RT(rt) (reg_map[rt]) #define RN(rn) (reg_map[rn] << 5) #define RT2(rt2) (reg_map[rt2] << 10) #define RM(rm) (reg_map[rm] << 16) -#define VD(vd) (vd) -#define VT(vt) (vt) -#define VN(vn) ((vn) << 5) -#define VM(vm) ((vm) << 16) +#define VD(vd) (freg_map[vd]) +#define VT(vt) (freg_map[vt]) +#define VN(vn) (freg_map[vn] << 5) +#define VM(vm) (freg_map[vm] << 16) /* --------------------------------------------------------------------- */ /* Instrucion forms */ @@ -1088,14 +1092,14 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji /* --------------------------------------------------------------------- */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 i, tmp, offs, prev, saved_regs_size; + sljit_s32 args, i, tmp, offs, prev, saved_regs_size; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; @@ -1165,6 +1169,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); } + args = get_arg_count(arg_types); + if (args >= 1) FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); if (args >= 2) @@ -1176,12 +1182,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; local_size = (local_size + 15) & ~0xf; @@ -1568,7 +1574,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return reg; + return freg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1936,6 +1942,20 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { @@ -1978,7 +1998,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); - /* In ARM, we don't need to touch the arguments. */ if (!(src & SLJIT_IMM)) { if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw)); @@ -1987,6 +2006,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); } + /* These jumps are converted to jump/call instructions when possible. */ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); @@ -1997,6 +2017,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c index 29e5566..db0d8a1 100644 --- a/src/sljit/sljitNativeARM_T2_32.c +++ b/src/sljit/sljitNativeARM_T2_32.c @@ -26,7 +26,11 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { - return "ARM-Thumb2" SLJIT_CPUINFO; +#ifdef __SOFTFP__ + return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp"; +#else + return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp"; +#endif } /* Length of an instruction word. */ @@ -37,12 +41,16 @@ typedef sljit_u32 sljit_ins; #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) -#define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 14, 15 + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7 }; #define COPY_BITS(src, from, to, bits) \ @@ -69,9 +77,9 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define RN4(rn) (reg_map[rn] << 16) #define RM4(rm) (reg_map[rm]) #define RT4(rt) (reg_map[rt] << 12) -#define DD4(dd) ((dd) << 12) -#define DN4(dn) ((dn) << 16) -#define DM4(dm) (dm) +#define DD4(dd) (freg_map[dd] << 12) +#define DN4(dn) (freg_map[dn] << 16) +#define DM4(dm) (freg_map[dm]) #define IMM5(imm) \ (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) #define IMM12(imm) \ @@ -178,6 +186,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define VDIV_F32 0xee800a00 #define VMOV_F32 0xeeb00a40 #define VMOV 0xee000a10 +#define VMOV2 0xec400a10 #define VMRS 0xeef1fa10 #define VMUL_F32 0xee200a00 #define VNEG_F32 0xeeb10a40 @@ -208,10 +217,10 @@ static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst) static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm) { - FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | - COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); - return push_inst32(compiler, MOVT | RD4(dst) | - COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) + | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst) + | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); } static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) @@ -522,13 +531,13 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, } /* set low 16 bits, set hi 16 bits to 0. */ - FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | - COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) + | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); /* set hi 16 bit if needed. */ if (imm >= 0x10000) - return push_inst32(compiler, MOVT | RD4(dst) | - COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); + return push_inst32(compiler, MOVT | RD4(dst) + | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); return SLJIT_SUCCESS; } @@ -1088,15 +1097,15 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit /* --------------------------------------------------------------------- */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 size, i, tmp; + sljit_s32 args, size, i, tmp; sljit_ins push = 0; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; for (i = SLJIT_S0; i >= tmp; i--) @@ -1120,6 +1129,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); } + args = get_arg_count(arg_types); + if (args >= 1) FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); if (args >= 2) @@ -1131,14 +1142,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_s32 size; CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); compiler->local_size = ((size + local_size + 7) & ~7) - size; @@ -1219,11 +1230,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_DIV_UW: case SLJIT_DIV_SW: SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); - SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); saved_reg_count = 0; if (compiler->scratches >= 4) - saved_reg_list[saved_reg_count++] = 12; + saved_reg_list[saved_reg_count++] = 3; if (compiler->scratches >= 3) saved_reg_list[saved_reg_count++] = 2; if (op >= SLJIT_DIV_UW) @@ -1448,7 +1459,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return reg << 1; + return (freg_map[reg] << 1); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1798,7 +1809,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - /* In ARM, we don't need to touch the arguments. */ PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); if (type < SLJIT_JUMP) { jump->flags |= IS_COND; @@ -1818,6 +1828,241 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } +#ifdef __SOFTFP__ + +static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 stack_offset = 0; + sljit_s32 arg_count = 0; + sljit_s32 word_arg_offset = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 src_offset = 4 * sizeof(sljit_sw); + sljit_u8 offsets[4]; + + if (src && FAST_IS_REG(*src)) + src_offset = reg_map[*src] * sizeof(sljit_sw); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_f32); + arg_count++; + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F64: + if (stack_offset & 0x7) + stack_offset += sizeof(sljit_sw); + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_f64); + arg_count++; + float_arg_count++; + break; + default: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_sw); + arg_count++; + word_arg_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (stack_offset > 16) + FAIL_IF(push_inst16(compiler, SUB_SP | (((stack_offset - 16) + 0x7) & ~0x7) >> 2)); + + SLJIT_ASSERT(reg_map[TMP_REG1] == 12); + + /* Process arguments in reversed direction. */ + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + arg_count--; + float_arg_count--; + stack_offset = offsets[arg_count]; + + if (stack_offset < 16) { + if (src_offset == stack_offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (stack_offset << 10))); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + break; + case SLJIT_ARG_TYPE_F64: + arg_count--; + float_arg_count--; + stack_offset = offsets[arg_count]; + + SLJIT_ASSERT((stack_offset & 0x7) == 0); + + if (stack_offset < 16) { + if (src_offset == stack_offset || src_offset == stack_offset + sizeof(sljit_sw)) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (stack_offset << 10) | ((stack_offset + sizeof(sljit_sw)) << 14) | float_arg_count)); + } else + FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((stack_offset - 16) >> 2))); + break; + default: + arg_count--; + word_arg_offset -= sizeof(sljit_sw); + stack_offset = offsets[arg_count]; + + SLJIT_ASSERT(stack_offset >= word_arg_offset); + + if (stack_offset != word_arg_offset) { + if (stack_offset < 16) { + if (src_offset == stack_offset) { + FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7))); + *src = TMP_REG1; + } + else if (src_offset == word_arg_offset) { + *src = 1 + (stack_offset >> 2); + src_offset = stack_offset; + } + FAIL_IF(push_inst16(compiler, MOV | (stack_offset >> 2) | (word_arg_offset << 1))); + } else + FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((stack_offset - 16) >> 2))); + } + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_s32 stack_size = 0; + + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) + FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12))); + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) + FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0)); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + if (stack_size & 0x7) + stack_size += sizeof(sljit_sw); + stack_size += sizeof(sljit_f64); + break; + default: + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (stack_size <= 16) + return SLJIT_SUCCESS; + + return push_inst16(compiler, ADD_SP | ((((stack_size - 16) + 0x7) & ~0x7) >> 2)); +} + +#else + +static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_u32 remap = 0; + sljit_u32 offset = 0; + sljit_u32 new_offset, mask; + + /* Remove return value. */ + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32) { + new_offset = 0; + mask = 1; + + while (remap & mask) { + new_offset++; + mask <<= 1; + } + remap |= mask; + + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | DD4((new_offset >> 1) + 1) + | ((new_offset & 0x1) ? 0x400000 : 0) | DM4((offset >> 1) + 1))); + + offset += 2; + } + else if ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F64) { + new_offset = 0; + mask = 3; + + while (remap & mask) { + new_offset += 2; + mask <<= 2; + } + remap |= mask; + + if (offset != new_offset) + FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_F32_OP | DD4((new_offset >> 1) + 1) | DM4((offset >> 1) + 1))); + + offset += 2; + } + arg_types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ +#ifdef __SOFTFP__ + struct sljit_jump *jump; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#ifdef __SOFTFP__ + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; +#else + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +#endif +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump; @@ -1826,16 +2071,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); - /* In ARM, we don't need to touch the arguments. */ + SLJIT_ASSERT(reg_map[TMP_REG1] != 14); + if (!(src & SLJIT_IMM)) { - if (FAST_IS_REG(src)) + if (FAST_IS_REG(src)) { + SLJIT_ASSERT(reg_map[src] != 14); return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); + } FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1)); if (type >= SLJIT_FAST_CALL) return push_inst16(compiler, BLX | RN3(TMP_REG1)); } + /* These jumps are converted to jump/call instructions when possible. */ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); @@ -1846,6 +2095,41 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#ifdef __SOFTFP__ + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + } + + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + return softfloat_post_call_with_args(compiler, arg_types); +#else /* !__SOFTFP__ */ + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +#endif /* __SOFTFP__ */ +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) @@ -1896,8 +2180,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return SLJIT_SUCCESS; /* The condition must always be set, even if the ORR/EORI is not executed above. */ - if (reg_map[dst_r] <= 7) - return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r)); return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); } @@ -1924,8 +2206,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil if (tmp < 0x10000) { /* set low 16 bits, set hi 16 bits to 0. */ FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); - return push_inst32(compiler, MOVW | RD4(dst_reg) | - COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); + return push_inst32(compiler, MOVW | RD4(dst_reg) + | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); } tmp = get_imm(srcw); @@ -1943,10 +2225,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4)); tmp = (sljit_uw) srcw; - FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) | - COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff))); - return push_inst32(compiler, MOVT | RD4(dst_reg) | - COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16)); + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) + | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst_reg) + | COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16)); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c index 62e1610..9f9e157 100644 --- a/src/sljit/sljitNativeMIPS_32.c +++ b/src/sljit/sljitNativeMIPS_32.c @@ -435,3 +435,232 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +{ + sljit_s32 stack_offset = 0; + sljit_s32 arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 arg_count_save, types_save; + sljit_ins prev_ins = NOP; + sljit_ins ins = NOP; + sljit_u8 offsets[4]; + + SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + offsets[arg_count] = (sljit_u8)stack_offset; + + if (word_arg_count == 0 && arg_count <= 1) + offsets[arg_count] = 254 + arg_count; + + stack_offset += sizeof(sljit_f32); + arg_count++; + float_arg_count++; + break; + case SLJIT_ARG_TYPE_F64: + if (stack_offset & 0x7) + stack_offset += sizeof(sljit_sw); + offsets[arg_count] = (sljit_u8)stack_offset; + + if (word_arg_count == 0 && arg_count <= 1) + offsets[arg_count] = 254 + arg_count; + + stack_offset += sizeof(sljit_f64); + arg_count++; + float_arg_count++; + break; + default: + offsets[arg_count] = (sljit_u8)stack_offset; + stack_offset += sizeof(sljit_sw); + arg_count++; + word_arg_count++; + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ + if (stack_offset > 16) + FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP))); + + types_save = types; + arg_count_save = arg_count; + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + arg_count--; + if (offsets[arg_count] < 254) + ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]); + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F64: + arg_count--; + if (offsets[arg_count] < 254) + ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]); + float_arg_count--; + break; + default: + if (offsets[arg_count - 1] >= 16) + ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(offsets[arg_count - 1]); + else if (arg_count != word_arg_count) + ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2)); + else if (arg_count == 1) + ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3); + + arg_count--; + word_arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_DEF_SHIFT; + } + + types = types_save; + arg_count = arg_count_save; + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + arg_count--; + if (offsets[arg_count] == 254) + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + else if (offsets[arg_count] < 16) + ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]); + break; + case SLJIT_ARG_TYPE_F64: + arg_count--; + if (offsets[arg_count] == 254) + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + else if (offsets[arg_count] < 16) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]); + ins = LW | S(SLJIT_SP) | TA(5 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count] + sizeof(sljit_sw)); + } + break; + default: + arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_DEF_SHIFT; + } + + *ins_ptr = prev_ins; + + return SLJIT_SUCCESS; +} + +static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types) +{ + sljit_s32 stack_offset = 0; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_offset += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + if (stack_offset & 0x7) + stack_offset += sizeof(sljit_sw); + stack_offset += sizeof(sljit_f64); + break; + default: + stack_offset += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + /* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */ + if (stack_offset > 16) + return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(16), DR(SLJIT_SP)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_ins ins; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); + + jump->flags |= IS_JAL | IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + PTR_FAIL_IF(post_call_with_args(compiler, arg_types)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + } + + FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + /* Register input. */ + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + return post_call_with_args(compiler, arg_types); +} diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c index dd114bb..ff6f048 100644 --- a/src/sljit/sljitNativeMIPS_64.c +++ b/src/sljit/sljitNativeMIPS_64.c @@ -537,3 +537,132 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 6); } + +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_ins prev_ins = NOP; + sljit_ins ins = NOP; + + SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12); + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + case SLJIT_ARG_TYPE_F64: + arg_count++; + float_arg_count++; + break; + default: + arg_count++; + word_arg_count++; + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F64: + if (arg_count != float_arg_count) + ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count); + else if (arg_count == 1) + ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1); + arg_count--; + float_arg_count--; + break; + default: + if (arg_count != word_arg_count) + ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count); + else if (arg_count == 1) + ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3); + arg_count--; + word_arg_count--; + break; + } + + if (ins != NOP) { + if (prev_ins != NOP) + FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS)); + prev_ins = ins; + ins = NOP; + } + + types >>= SLJIT_DEF_SHIFT; + } + + *ins_ptr = prev_ins; + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_ins ins; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); + + jump->flags |= IS_JAL | IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + + if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + else if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + } + + FAIL_IF(call_with_args(compiler, arg_types, &ins)); + + /* Register input. */ + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, ins, UNMOVABLE_INS); +} diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c index 00e8303..588ca10 100644 --- a/src/sljit/sljitNativeMIPS_common.c +++ b/src/sljit/sljitNativeMIPS_common.c @@ -60,13 +60,27 @@ typedef sljit_u32 sljit_ins; #define EQUAL_FLAG 31 #define OTHER_FLAG 1 -#define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 }; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 14, 2, 4, 6, 8, 12, 10 +}; + +#else + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 13, 14, 15, 16, 17, 12, 18 +}; + +#endif + /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ @@ -74,21 +88,23 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define S(s) (reg_map[s] << 21) #define T(t) (reg_map[t] << 16) #define D(d) (reg_map[d] << 11) +#define FT(t) (freg_map[t] << 16) +#define FS(s) (freg_map[s] << 11) +#define FD(d) (freg_map[d] << 6) /* Absolute registers. */ #define SA(s) ((s) << 21) #define TA(t) ((t) << 16) #define DA(d) ((d) << 11) -#define FT(t) ((t) << 16) -#define FS(s) ((s) << 11) -#define FD(d) ((d) << 6) #define IMM(imm) ((imm) & 0xffff) #define SH_IMM(imm) ((imm) << 6) #define DR(dr) (reg_map[dr]) +#define FR(dr) (freg_map[dr]) #define HI(opcode) ((opcode) << 26) #define LO(opcode) (opcode) /* S = (16 << 21) D = (17 << 21) */ #define FMT_S (16 << 21) +#define FMT_D (17 << 21) #define ABS_S (HI(17) | FMT_S | LO(5)) #define ADD_S (HI(17) | FMT_S | LO(0)) @@ -153,6 +169,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define OR (HI(0) | LO(37)) #define ORI (HI(13)) #define SD (HI(63)) +#define SDC1 (HI(61)) #define SLT (HI(0) | LO(42)) #define SLTI (HI(10)) #define SLTIU (HI(11)) @@ -166,6 +183,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define SUB_S (HI(17) | FMT_S | LO(1)) #define SUBU (HI(0) | LO(35)) #define SW (HI(43)) +#define SWC1 (HI(57)) #define TRUNC_W_S (HI(17) | FMT_S | LO(13)) #define XOR (HI(0) | LO(38)) #define XORI (HI(14)) @@ -564,6 +582,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define STACK_LOAD LD #endif +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw); + #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #include "sljitNativeMIPS_32.c" #else @@ -571,15 +591,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #endif SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_ins base; - sljit_s32 i, tmp, offs; + sljit_s32 args, i, tmp, offs; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -616,6 +636,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); } + args = get_arg_count(arg_types); + if (args >= 1) FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0))); if (args >= 2) @@ -627,12 +649,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -1298,7 +1320,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return reg << 1; + return FR(reg); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -1328,11 +1350,9 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp #endif if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); src = TMP_FREG1; } - else - src <<= 1; FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); @@ -1340,7 +1360,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS); /* Store the integer value from a VFP register. */ - return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); + return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) # undef is_long @@ -1357,13 +1377,13 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 flags = (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) << 21; #endif - sljit_s32 dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); else if (src & SLJIT_MEM) { /* Load the integer value into a VFP register. */ - FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); } else { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -1377,7 +1397,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); if (dst & SLJIT_MEM) - return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG1), dst, dstw, 0, 0); return SLJIT_SUCCESS; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -1392,18 +1412,14 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_ins inst; if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); src1 = TMP_FREG1; } - else - src1 <<= 1; if (src2 & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, 0, 0)); src2 = TMP_FREG2; } - else - src2 <<= 1; switch (GET_FLAG_TYPE(op)) { case SLJIT_EQUAL_F64: @@ -1443,14 +1459,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) op ^= SLJIT_F32_OP; - dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(dst_r), src, srcw, dst, dstw)); src = dst_r; } - else - src <<= 1; switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: @@ -1474,7 +1488,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil } if (dst & SLJIT_MEM) - return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); + return emit_op_mem2(compiler, FLOAT_DATA(op), FR(dst_r), dst, dstw, 0, 0); return SLJIT_SUCCESS; } @@ -1494,42 +1508,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; if (src1 & SLJIT_MEM) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w)) { FAIL_IF(compiler->error); src1 = TMP_FREG1; } else flags |= SLOW_SRC1; } - else - src1 <<= 1; if (src2 & SLJIT_MEM) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w)) { FAIL_IF(compiler->error); src2 = TMP_FREG2; } else flags |= SLOW_SRC2; } - else - src2 <<= 1; if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); } else { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); } } else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw)); else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw)); if (flags & SLOW_SRC1) src1 = TMP_FREG1; @@ -1555,7 +1565,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } if (dst_r == TMP_FREG2) - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), FR(TMP_FREG2), dst, dstw, 0, 0)); return SLJIT_SUCCESS; } @@ -1705,19 +1715,16 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - if (type <= SLJIT_JUMP) { + + if (type <= SLJIT_JUMP) PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); - jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - } else { - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); - /* Cannot be optimized out if type is >= CALL0. */ - jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? IS_CALL : 0); + else { + jump->flags |= IS_JAL; PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); - jump->addr = compiler->size; - /* A NOP if type < CALL1. */ - PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS)); } + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); return jump; } @@ -1873,41 +1880,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { - sljit_s32 src_r = TMP_REG2; struct sljit_jump *jump = NULL; CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); - if (FAST_IS_REG(src)) { - if (DR(src) != 4) - src_r = src; - else - FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - } - - if (type >= SLJIT_CALL0) { - SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); - if (src & (SLJIT_IMM | SLJIT_MEM)) { - if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); - else { - SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM)); - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); - } - FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); - /* We need an extra instruction in any case. */ - return push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS); - } - - /* Register input. */ - if (type >= SLJIT_CALL1) - FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), 4)); - FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); - return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS); - } - if (src & SLJIT_IMM) { jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); @@ -1918,11 +1896,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi jump->flags |= IS_MOVABLE; FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + src = TMP_REG2; + } + else if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(TMP_REG2), src, srcw)); + src = TMP_REG2; } - else if (src & SLJIT_MEM) - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); - FAIL_IF(push_inst(compiler, JR | S(src_r), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); if (jump) jump->addr = compiler->size; FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); diff --git a/src/sljit/sljitNativePPC_64.c b/src/sljit/sljitNativePPC_64.c index 5366c30..706b2ba 100644 --- a/src/sljit/sljitNativePPC_64.c +++ b/src/sljit/sljitNativePPC_64.c @@ -413,6 +413,61 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; } +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 reg = 0; + + if (src) + reg = *src & REG_MASK; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + case SLJIT_ARG_TYPE_F64: + arg_count++; + break; + default: + arg_count++; + word_arg_count++; + + if (arg_count != word_arg_count && arg_count == reg) { + FAIL_IF(push_inst(compiler, OR | S(reg) | A(TMP_CALL_REG) | B(reg))); + *src = TMP_CALL_REG; + } + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + case SLJIT_ARG_TYPE_F64: + arg_count--; + break; + default: + if (arg_count != word_arg_count) + FAIL_IF(push_inst(compiler, OR | S(word_arg_count) | A(arg_count) | B(word_arg_count))); + + arg_count--; + word_arg_count--; + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw init_value) { FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c index 2bf855c..e5fd42e 100644 --- a/src/sljit/sljitNativePPC_common.c +++ b/src/sljit/sljitNativePPC_common.c @@ -102,13 +102,17 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_CALL_REG TMP_REG2 #endif -#define TMP_FREG1 (0) -#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { 0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12 }; +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 1, 2, 3, 4, 5, 6, 0, 7 +}; + /* --------------------------------------------------------------------- */ /* Instrucion forms */ /* --------------------------------------------------------------------- */ @@ -117,11 +121,11 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { #define A(a) (reg_map[a] << 16) #define B(b) (reg_map[b] << 11) #define C(c) (reg_map[c] << 6) -#define FD(fd) ((fd) << 21) -#define FS(fs) ((fs) << 21) -#define FA(fa) ((fa) << 16) -#define FB(fb) ((fb) << 11) -#define FC(fc) ((fc) << 6) +#define FD(fd) (freg_map[fd] << 21) +#define FS(fs) (freg_map[fs] << 21) +#define FA(fa) (freg_map[fa] << 16) +#define FB(fb) (freg_map[fb] << 11) +#define FC(fc) (freg_map[fc] << 6) #define IMM(imm) ((imm) & 0xffff) #define CRD(d) ((d) << 21) @@ -610,14 +614,14 @@ ALT_FORM5 0x100000 */ #endif SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 i, tmp, offs; + sljit_s32 args, i, tmp, offs; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); FAIL_IF(push_inst(compiler, MFLR | D(0))); offs = -(sljit_s32)(sizeof(sljit_sw)); @@ -643,6 +647,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #endif FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); + + args = get_arg_count(arg_types); + if (args >= 1) FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0))); if (args >= 2) @@ -674,12 +681,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; compiler->local_size = (local_size + 15) & ~0xf; @@ -1385,21 +1392,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + if (op < SLJIT_NOT && FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } + if (op_flags & SLJIT_I32_OP) { if (op < SLJIT_NOT) { - if (FAST_IS_REG(src) && src == dst) { - if (!TYPE_CAST_NEEDED(op)) - return SLJIT_SUCCESS; - } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM)) - op = SLJIT_MOV_U32; - if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM)) - op = SLJIT_MOVU_U32; - if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM)) - op = SLJIT_MOV_S32; - if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM)) - op = SLJIT_MOVU_S32; + if (src & SLJIT_MEM) { + if (op == SLJIT_MOV_S32) + op = SLJIT_MOV_U32; + if (op == SLJIT_MOVU_S32) + op = SLJIT_MOVU_U32; + } + else if (src & SLJIT_IMM) { + if (op == SLJIT_MOV_U32) + op = SLJIT_MOV_S32; + if (op == SLJIT_MOVU_U32) + op = SLJIT_MOVU_S32; + } #endif } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -1746,7 +1758,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return reg; + return freg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -2183,7 +2195,7 @@ static sljit_ins get_bo_bi_flags(sljit_s32 type) return (4 << 21) | ((4 + 3) << 16); default: - SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL); return (20 << 21); } } @@ -2209,7 +2221,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type < SLJIT_JUMP) jump->flags |= IS_COND; #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) - if (type >= SLJIT_CALL0) + if (type >= SLJIT_CALL) jump->flags |= IS_CALL; #endif @@ -2220,6 +2232,24 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump = NULL; @@ -2231,7 +2261,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (FAST_IS_REG(src)) { #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) - if (type >= SLJIT_CALL0) { + if (type >= SLJIT_CALL) { FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); src_r = TMP_CALL_REG; } @@ -2241,12 +2271,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi src_r = src; #endif } else if (src & SLJIT_IMM) { + /* These jumps are converted to jump/call instructions when possible. */ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR); jump->u.target = srcw; #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) - if (type >= SLJIT_CALL0) + if (type >= SLJIT_CALL) jump->flags |= IS_CALL; #endif FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); @@ -2263,6 +2294,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw)); + src = TMP_CALL_REG; + } + + FAIL_IF(call_with_args(compiler, arg_types, &src)); +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) diff --git a/src/sljit/sljitNativeSPARC_32.c b/src/sljit/sljitNativeSPARC_32.c index ee42130..0671b13 100644 --- a/src/sljit/sljitNativeSPARC_32.c +++ b/src/sljit/sljitNativeSPARC_32.c @@ -138,6 +138,125 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; } +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) +{ + sljit_s32 reg_index = 8; + sljit_s32 word_reg_index = 8; + sljit_s32 float_arg_index = 1; + sljit_s32 double_arg_count = 0; + sljit_s32 float_offset = (16 + 6) * sizeof(sljit_sw); + sljit_s32 types = 0; + sljit_s32 reg = 0; + sljit_s32 move_to_tmp2 = 0; + + if (src) + reg = reg_map[*src & REG_MASK]; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + float_arg_index++; + if (reg_index == reg) + move_to_tmp2 = 1; + reg_index++; + break; + case SLJIT_ARG_TYPE_F64: + float_arg_index++; + double_arg_count++; + if (reg_index == reg || reg_index + 1 == reg) + move_to_tmp2 = 1; + reg_index += 2; + break; + default: + if (reg_index != word_reg_index && reg_index < 14 && reg_index == reg) + move_to_tmp2 = 1; + reg_index++; + word_reg_index++; + break; + } + + if (move_to_tmp2) { + move_to_tmp2 = 0; + if (reg < 14) + FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); + *src = TMP_REG1; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + arg_types = types; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + float_arg_index--; + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset -= sizeof(sljit_f64); + break; + case SLJIT_ARG_TYPE_F64: + float_arg_index--; + if (float_arg_index == 4 && double_arg_count == 4) { + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS)); + } + else + FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); + float_offset -= sizeof(sljit_f64); + break; + default: + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + float_offset = (16 + 6) * sizeof(sljit_sw); + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + reg_index--; + if (reg_index < 14) + FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); + float_offset -= sizeof(sljit_f64); + break; + case SLJIT_ARG_TYPE_F64: + reg_index -= 2; + if (reg_index < 14) { + if ((reg_index & 0x1) != 0) { + FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); + if (reg_index < 13) + FAIL_IF(push_inst(compiler, LDUW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), reg_index + 1)); + } + else + FAIL_IF(push_inst(compiler, LDD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); + } + float_offset -= sizeof(sljit_f64); + break; + default: + reg_index--; + word_reg_index--; + + if (reg_index != word_reg_index) { + if (reg_index < 14) + FAIL_IF(push_inst(compiler, OR | DA(reg_index) | S1(0) | S2A(word_reg_index), reg_index)); + else + FAIL_IF(push_inst(compiler, STW | DA(word_reg_index) | S1(SLJIT_SP) | IMM(92), word_reg_index)); + } + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) { FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst))); diff --git a/src/sljit/sljitNativeSPARC_common.c b/src/sljit/sljitNativeSPARC_common.c index 9831bd8..66e6c47 100644 --- a/src/sljit/sljitNativeSPARC_common.c +++ b/src/sljit/sljitNativeSPARC_common.c @@ -90,13 +90,19 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +/* This register is modified by calls, which affects the instruction + in the delay slot if it is used as a source register. */ #define TMP_LINK (SLJIT_NUMBER_OF_REGISTERS + 5) -#define TMP_FREG1 (0) -#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { - 0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15 + 0, 8, 9, 10, 11, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 12, 13, 15 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 0, 2, 4, 6, 8, 10, 12, 14 }; /* --------------------------------------------------------------------- */ @@ -104,10 +110,15 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { /* --------------------------------------------------------------------- */ #define D(d) (reg_map[d] << 25) +#define FD(d) (freg_map[d] << 25) +#define FDN(d) ((freg_map[d] | 0x1) << 25) #define DA(d) ((d) << 25) #define S1(s1) (reg_map[s1] << 14) -#define S2(s2) (reg_map[s2]) +#define FS1(s1) (freg_map[s1] << 14) #define S1A(s1) ((s1) << 14) +#define S2(s2) (reg_map[s2]) +#define FS2(s2) (freg_map[s2]) +#define FS2N(s2) (freg_map[s2] | 0x1) #define S2A(s2) (s2) #define IMM_ARG 0x2000 #define DOP(op) ((op) << 5) @@ -144,6 +155,8 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { #define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46)) #define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) #define JMPL (OPC1(0x2) | OPC3(0x38)) +#define LDD (OPC1(0x3) | OPC3(0x03)) +#define LDUW (OPC1(0x3) | OPC3(0x00)) #define NOP (OPC1(0x0) | OPC2(0x04)) #define OR (OPC1(0x2) | OPC3(0x02)) #define ORN (OPC1(0x2) | OPC3(0x06)) @@ -157,6 +170,9 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { #define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) #define SRL (OPC1(0x2) | OPC3(0x26)) #define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) +#define STDF (OPC1(0x3) | OPC3(0x27)) +#define STF (OPC1(0x3) | OPC3(0x24)) +#define STW (OPC1(0x3) | OPC3(0x04)) #define SUB (OPC1(0x2) | OPC3(0x04)) #define SUBC (OPC1(0x2) | OPC3(0x0c)) #define TA (OPC1(0x2) | OPC3(0x3a) | (8 << 25)) @@ -455,12 +471,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #endif SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; compiler->local_size = local_size; @@ -479,12 +495,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; return SLJIT_SUCCESS; @@ -553,7 +569,7 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag if (SLJIT_UNLIKELY(flags & ARG_TEST)) return 1; FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] - | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg)) + | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)) | S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS)); return -1; @@ -638,7 +654,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl } } - dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg)); + dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)); delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS; if (!base) return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot); @@ -962,7 +978,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return reg << 1; + return freg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -990,10 +1006,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); src = TMP_FREG1; } - else - src <<= 1; - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | FD(TMP_FREG1) | FS2(src), MOVABLE_INS)); if (FAST_IS_REG(dst)) { FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); @@ -1008,7 +1022,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -1027,7 +1041,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp } FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | FD(dst_r) | FS2(TMP_FREG1), MOVABLE_INS)); if (dst & SLJIT_MEM) return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); @@ -1042,17 +1056,13 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); src1 = TMP_FREG1; } - else - src1 <<= 1; if (src2 & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); src2 = TMP_FREG2; } - else - src2 <<= 1; - return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS); + return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | FS1(src1) | FS2(src2), FCC_IS_SET | MOVABLE_INS); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -1071,39 +1081,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) op ^= SLJIT_F32_OP; - dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); src = dst_r; } - else - src <<= 1; switch (GET_OPCODE(op)) { case SLJIT_MOV_F64: if (src != dst_r) { if (dst_r != TMP_FREG1) { - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FMOVS | FD(dst_r) | FS2(src), MOVABLE_INS)); if (!(op & SLJIT_F32_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); } else dst_r = src; } break; case SLJIT_NEG_F64: - FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FNEGS | FD(dst_r) | FS2(src), MOVABLE_INS)); if (dst_r != src && !(op & SLJIT_F32_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); break; case SLJIT_ABS_F64: - FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FABSS | FD(dst_r) | FS2(src), MOVABLE_INS)); if (dst_r != src && !(op & SLJIT_F32_OP)) - FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); break; case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | FD(dst_r) | FS2(src), MOVABLE_INS)); op ^= SLJIT_F32_OP; break; } @@ -1129,7 +1137,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil compiler->cache_arg = 0; compiler->cache_argw = 0; - dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; if (src1 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { @@ -1138,8 +1146,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } else flags |= SLOW_SRC1; } - else - src1 <<= 1; if (src2 & SLJIT_MEM) { if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { @@ -1148,8 +1154,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil } else flags |= SLOW_SRC2; } - else - src2 <<= 1; if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { @@ -1173,19 +1177,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil switch (GET_OPCODE(op)) { case SLJIT_ADD_F64: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); break; case SLJIT_SUB_F64: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); break; case SLJIT_MUL_F64: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); break; case SLJIT_DIV_F64: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); break; } @@ -1339,21 +1343,38 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #else #error "Implementation required" #endif - } else { + } + else { if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; if (type >= SLJIT_FAST_CALL) jump->flags |= IS_CALL; } - PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS)); + PTR_FAIL_IF(emit_const(compiler, TMP_REG1, 0)); + PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG1) | IMM(0), UNMOVABLE_INS)); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); return jump; } +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { struct sljit_jump *jump = NULL; @@ -1370,17 +1391,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi FAIL_IF(!jump); set_jump(jump, compiler, JUMP_ADDR); jump->u.target = srcw; + if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; if (type >= SLJIT_FAST_CALL) jump->flags |= IS_CALL; - FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - src_r = TMP_REG2; + FAIL_IF(emit_const(compiler, TMP_REG1, 0)); + src_r = TMP_REG1; } else { - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw)); - src_r = TMP_REG2; + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src_r = TMP_REG1; } FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS)); @@ -1389,6 +1411,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi return push_inst(compiler, NOP, UNMOVABLE_INS); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + + FAIL_IF(call_with_args(compiler, arg_types, &src)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 type) diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c index f5cf883..a6aa5c6 100644 --- a/src/sljit/sljitNativeX86_32.c +++ b/src/sljit/sljitNativeX86_32.c @@ -64,29 +64,28 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 size; + sljit_s32 args, size; sljit_u8 *inst; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + args = get_arg_count(arg_types); compiler->args = args; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - /* [esp+0] for saving temporaries and third argument for calls. */ - compiler->saveds_offset = 1 * sizeof(sljit_sw); -#else - /* [esp+0] for saving temporaries and space for maximum three arguments. */ - if (scratches <= 1) - compiler->saveds_offset = 1 * sizeof(sljit_sw); - else - compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw); + /* [esp+0] for saving temporaries and function calls. */ + compiler->stack_tmp_size = 2 * sizeof(sljit_sw); + +#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (scratches > 3) + compiler->stack_tmp_size = 3 * sizeof(sljit_sw); #endif + compiler->saveds_offset = compiler->stack_tmp_size; if (scratches > 3) compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); @@ -178,10 +177,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi /* Space for a single argument. This amount is excluded when the stack is allocated below. */ local_size -= sizeof(sljit_sw); FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); - FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw))); #endif - FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); + FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif @@ -192,12 +191,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0); /* Some space might allocated during sljit_grow_stack() above on WIN32. */ - FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw))); #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->local_size > 1024) - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw))); #endif @@ -213,31 +212,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0); } #endif - return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, + return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - compiler->args = args; + compiler->args = get_arg_count(arg_types); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - /* [esp+0] for saving temporaries and third argument for calls. */ - compiler->saveds_offset = 1 * sizeof(sljit_sw); -#else - /* [esp+0] for saving temporaries and space for maximum three arguments. */ - if (scratches <= 1) - compiler->saveds_offset = 1 * sizeof(sljit_sw); - else - compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw); + /* [esp+0] for saving temporaries and function calls. */ + compiler->stack_tmp_size = 2 * sizeof(sljit_sw); + +#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (scratches > 3) + compiler->stack_tmp_size = 3 * sizeof(sljit_sw); #endif + compiler->saveds_offset = compiler->stack_tmp_size; if (scratches > 3) compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); @@ -278,10 +275,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp if (compiler->options & SLJIT_F64_ALIGNMENT) EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size) else - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); #else - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); #endif @@ -418,7 +415,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; - if ((a & SLJIT_IMM) || (a == 0)) + if (a & SLJIT_IMM) *buf_ptr = 0; else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = reg_map[a] << 3; @@ -490,42 +487,324 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 /* Call / return instructions */ /* --------------------------------------------------------------------- */ -static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type) +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + +static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +{ + sljit_s32 stack_size = 0; + sljit_s32 word_arg_count = 0; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + stack_size += sizeof(sljit_f64); + break; + default: + word_arg_count++; + if (word_arg_count > 2) + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (word_arg_count_ptr) + *word_arg_count_ptr = word_arg_count; + + return stack_size; +} + +static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args) { sljit_u8 *inst; + sljit_s32 float_arg_count; + + if (stack_size == sizeof(sljit_sw) && word_arg_count == 3) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + PUSH_REG(reg_map[SLJIT_R2]); + } + else if (stack_size > 0) { + if (word_arg_count >= 4) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); + + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + + stack_size = 0; + arg_types >>= SLJIT_DEF_SHIFT; + word_arg_count = 0; + float_arg_count = 0; + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += sizeof(sljit_f64); + break; + default: + word_arg_count++; + if (word_arg_count == 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0); + stack_size += sizeof(sljit_sw); + } + else if (word_arg_count == 4) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0); + stack_size += sizeof(sljit_sw); + } + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + } + + if (word_arg_count > 0) { + if (swap_args) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + + *inst++ = XCHG_EAX_r | reg_map[SLJIT_R2]; + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; + } + } + + return SLJIT_SUCCESS; +} + +#endif + +static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +{ + sljit_s32 stack_size = 0; + sljit_s32 word_arg_count = 0; + + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + stack_size += sizeof(sljit_f64); + break; + default: + word_arg_count++; + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + if (word_arg_count_ptr) + *word_arg_count_ptr = word_arg_count; + + if (stack_size <= compiler->stack_tmp_size) + return 0; + +#if defined(__APPLE__) + return ((stack_size - compiler->stack_tmp_size + 15) & ~15); +#else + return stack_size - compiler->stack_tmp_size; +#endif +} + +static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count) +{ + sljit_s32 float_arg_count = 0; + + if (word_arg_count >= 4) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw)); + + if (stack_size > 0) + FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + + stack_size = 0; + word_arg_count = 0; + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += sizeof(sljit_f32); + break; + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); + stack_size += sizeof(sljit_f64); + break; + default: + word_arg_count++; + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0); + stack_size += sizeof(sljit_sw); + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; + } + + return SLJIT_SUCCESS; +} + +static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, + sljit_s32 arg_types, sljit_s32 stack_size) +{ + sljit_u8 *inst; + sljit_s32 single; + + if (stack_size > 0) + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + + if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) + return SLJIT_SUCCESS; + + single = ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + inst[0] = single ? FSTPS : FSTPD; + inst[1] = (0x03 << 3) | 0x04; + inst[2] = (0x04 << 3) | reg_map[SLJIT_SP]; + + return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + struct sljit_jump *jump; + sljit_s32 stack_size = 0; + sljit_s32 word_arg_count; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - inst = (sljit_u8*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2); - FAIL_IF(!inst); - INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); + if ((type & 0xff) == SLJIT_CALL) { + stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); + PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0)); - if (type >= SLJIT_CALL3) - PUSH_REG(reg_map[SLJIT_R2]); - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; -#else - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0)); - FAIL_IF(!inst); - INC_SIZE(4 * (type - SLJIT_CALL0)); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif - *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; - *inst++ = 0; - if (type >= SLJIT_CALL2) { - *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; - *inst++ = sizeof(sljit_sw); - } - if (type >= SLJIT_CALL3) { - *inst++ = MOV_rm_r; - *inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */; - *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; - *inst++ = 2 * sizeof(sljit_sw); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0)); + return jump; } #endif - return SLJIT_SUCCESS; + + stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); + PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); + + PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size)); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 stack_size = 0; + sljit_s32 word_arg_count; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + sljit_s32 swap_args; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3); + + if ((type & 0xff) == SLJIT_CALL) { + stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); + swap_args = 0; + + if (word_arg_count > 0) { + if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) { + swap_args = 1; + if (((src & REG_MASK) | 0x2) == SLJIT_R2) + src ^= 0x2; + if ((OFFS_REG(src) | 0x2) == SLJIT_R2) + src ^= TO_OFFS_REG(0x2); + } + } + + FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args)); + + compiler->saveds_offset += stack_size; + compiler->locals_offset += stack_size; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + compiler->saveds_offset -= stack_size; + compiler->locals_offset -= stack_size; + + return post_call_with_args(compiler, arg_types, 0); + } +#endif + + stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); + FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); + + compiler->saveds_offset += stack_size; + compiler->locals_offset += stack_size; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + + compiler->saveds_offset -= stack_size; + compiler->locals_offset -= stack_size; + + return post_call_with_args(compiler, arg_types, stack_size); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c index 039b68c..866beb2 100644 --- a/src/sljit/sljitNativeX86_64.c +++ b/src/sljit/sljitNativeX86_64.c @@ -66,15 +66,15 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 i, tmp, size, saved_register_size; + sljit_s32 args, i, tmp, size, saved_register_size; sljit_u8 *inst; CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); #ifdef _WIN64 /* Two/four register slots for parameters plus space for xmm6 register if needed. */ @@ -108,6 +108,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi PUSH_REG(reg_lmap[i]); } + args = get_arg_count(arg_types); + if (args > 0) { size = args * 3; inst = (sljit_u8*)ensure_buf(compiler, 1 + size); @@ -182,7 +184,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); + FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif @@ -223,14 +225,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_s32 saved_register_size; CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); #ifdef _WIN64 /* Two/four register slots for parameters plus space for xmm6 register if needed. */ @@ -414,7 +416,11 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 } } } - else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8) + else if (!(flags & EX86_SSE2_OP2)) { + if (reg_map[b] >= 8) + rex |= REX_B; + } + else if (freg_map[b] >= 8) rex |= REX_B; if (a & SLJIT_IMM) { @@ -441,7 +447,11 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 else { SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ - if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8) + if (!(flags & EX86_SSE2_OP1)) { + if (reg_map[a] >= 8) + rex |= REX_R; + } + else if (freg_map[a] >= 8) rex |= REX_R; } @@ -468,12 +478,12 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; - if ((a & SLJIT_IMM) || (a == 0)) + if (a & SLJIT_IMM) *buf_ptr = 0; else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = reg_lmap[a] << 3; else - *buf_ptr = a << 3; + *buf_ptr = freg_lmap[a] << 3; } else { if (a & SLJIT_IMM) { @@ -487,7 +497,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 } if (!(b & SLJIT_MEM)) - *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b); + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]); else if ((b & REG_MASK) != SLJIT_UNUSED) { if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { @@ -545,45 +555,161 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 /* Call / return instructions */ /* --------------------------------------------------------------------- */ -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type) -{ - sljit_u8 *inst; - - /* After any change update IS_REG_CHANGED_BY_CALL as well. */ #ifndef _WIN64 - SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 2); - inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); - FAIL_IF(!inst); - INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); - if (type >= SLJIT_CALL3) { - /* Move third argument to TMP_REG1. */ - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2]; +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw) +{ + sljit_s32 src = src_ptr ? (*src_ptr) : 0; + sljit_s32 word_arg_count = 0; + + SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2); + + compiler->mode32 = 0; + + /* Remove return value. */ + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32) + word_arg_count++; + arg_types >>= SLJIT_DEF_SHIFT; } - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0]; + + if (word_arg_count == 0) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + *src_ptr = TMP_REG2; + } + else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2) + *src_ptr = TMP_REG1; + + if (word_arg_count >= 3) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0); + return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0); +} + #else - SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 8); - inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); - FAIL_IF(!inst); - INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); - if (type >= SLJIT_CALL3) { - /* Move third argument to TMP_REG1. */ - *inst++ = REX_W | REX_R; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2]; +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw) +{ + sljit_s32 src = src_ptr ? (*src_ptr) : 0; + sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; + sljit_s32 float_arg_count = 0; + sljit_s32 types = 0; + sljit_s32 data_trandfer = 0; + static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 }; + + SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9); + + compiler->mode32 = 0; + arg_types >>= SLJIT_DEF_SHIFT; + + while (arg_types) { + types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK); + + switch (arg_types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + case SLJIT_ARG_TYPE_F64: + arg_count++; + float_arg_count++; + + if (arg_count != float_arg_count) + data_trandfer = 1; + break; + default: + arg_count++; + word_arg_count++; + + if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) { + data_trandfer = 1; + + if (src == word_arg_regs[arg_count]) { + EMIT_MOV(compiler, TMP_REG2, 0, src, 0); + *src_ptr = TMP_REG2; + } + } + break; + } + + arg_types >>= SLJIT_DEF_SHIFT; } - *inst++ = REX_W; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0]; -#endif + + if (!data_trandfer) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + *src_ptr = TMP_REG2; + } + + while (types) { + switch (types & SLJIT_DEF_MASK) { + case SLJIT_ARG_TYPE_F32: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; + case SLJIT_ARG_TYPE_F64: + if (arg_count != float_arg_count) + FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0)); + arg_count--; + float_arg_count--; + break; + default: + if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) + EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0); + arg_count--; + word_arg_count--; + break; + } + + types >>= SLJIT_DEF_SHIFT; + } + return SLJIT_SUCCESS; } +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + + FAIL_IF(call_with_args(compiler, arg_types, &src, srcw)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + + return sljit_emit_ijump(compiler, type, src, srcw); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { sljit_u8 *inst; @@ -679,7 +805,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler return SLJIT_SUCCESS; } - /* --------------------------------------------------------------------- */ /* Extend input */ /* --------------------------------------------------------------------- */ diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c index eb0886d..1ca8c43 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/src/sljit/sljitNativeX86_common.c @@ -26,7 +26,11 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + return "x86" SLJIT_CPUINFO " ABI:fastcall"; +#else return "x86" SLJIT_CPUINFO; +#endif } /* @@ -92,23 +96,32 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { #ifndef _WIN64 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { - 0, 0, 6, 1, 7, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9 + 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9 }; /* low-map. reg_map & 0x7. */ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { - 0, 0, 6, 1, 7, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 + 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 }; #else /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { - 0, 0, 2, 1, 10, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 8, 9 + 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10 }; /* low-map. reg_map & 0x7. */ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { - 0, 0, 2, 1, 2, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 0, 1 + 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2 }; #endif +/* Args: xmm0-xmm3 */ +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 4, 0, 1, 2, 3, 5, 6 +}; +/* low-map. freg_map & 0x7. */ +static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { + 4, 0, 1, 2, 3, 5, 6 +}; + #define REX_W 0x48 #define REX_R 0x44 #define REX_X 0x42 @@ -178,6 +191,8 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { #define CVTTSD2SI_r_xm 0x2c #define DIV (/* GROUP_F7 */ 6 << 3) #define DIVSD_x_xm 0x5e +#define FSTPS 0xd9 +#define FSTPD 0xdd #define INT3 0xcc #define IDIV (/* GROUP_F7 */ 7 << 3) #define IMUL (/* GROUP_F7 */ 5 << 3) @@ -613,9 +628,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) get_cpu_features(); return cpu_has_cmov; - case SLJIT_HAS_PREF_SHIFT_REG: - return 1; - case SLJIT_HAS_SSE2: #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) if (cpu_has_sse2 == -1) @@ -634,14 +646,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) /* Operators */ /* --------------------------------------------------------------------- */ +#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) + static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, - sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, + sljit_u32 op_types, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, + sljit_u32 op_types, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); @@ -653,10 +667,16 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); +static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src); + +static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, + sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); + #ifdef _WIN32 #include -static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) +static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size) { /* Workaround for calling the internal _chkstk() function on Windows. This function touches all 4k pages belongs to the requested stack space, @@ -1275,20 +1295,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile compiler->mode32 = 0; #endif + if (FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } + if (op_flags & SLJIT_I32_OP) { - if (FAST_IS_REG(src) && src == dst) { - if (!TYPE_CAST_NEEDED(op)) - return SLJIT_SUCCESS; - } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM)) - op = SLJIT_MOV_U32; - if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM)) - op = SLJIT_MOVU_U32; - if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM)) - op = SLJIT_MOV_S32; - if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM)) - op = SLJIT_MOVU_S32; + if (src & SLJIT_MEM) { + if (op == SLJIT_MOV_S32) + op = SLJIT_MOV_U32; + if (op == SLJIT_MOVU_S32) + op = SLJIT_MOVU_U32; + } + else if (src & SLJIT_IMM) { + if (op == SLJIT_MOV_U32) + op = SLJIT_MOV_S32; + if (op == SLJIT_MOVU_U32) + op = SLJIT_MOVU_S32; + } #endif } @@ -1372,22 +1397,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) { if ((src & OFFS_REG_MASK) != 0) { - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0)); } else if (srcw != 0) { - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw)); } } if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) { if ((dst & OFFS_REG_MASK) != 0) { - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0)); } else if (dstw != 0) { - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw)); } } @@ -1445,12 +1470,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #endif static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, - sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, + sljit_u32 op_types, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { sljit_u8* inst; + sljit_u8 op_eax_imm = (op_types >> 24); + sljit_u8 op_rm = (op_types >> 16) & 0xff; + sljit_u8 op_mr = (op_types >> 8) & 0xff; + sljit_u8 op_imm = op_types & 0xff; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); @@ -1561,12 +1590,16 @@ static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, } static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm, + sljit_u32 op_types, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { sljit_u8* inst; + sljit_u8 op_eax_imm = (op_types >> 24); + sljit_u8 op_rm = (op_types >> 16) & 0xff; + sljit_u8 op_mr = (op_types >> 8) & 0xff; + sljit_u8 op_imm = op_types & 0xff; if (dst == SLJIT_UNUSED) { EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); @@ -2044,7 +2077,7 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); } - else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { + else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { if (src1 != dst) EMIT_MOV(compiler, dst, 0, src1, src1w); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); @@ -2057,27 +2090,24 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, else { /* This case is complex since ecx itself may be used for addressing, and this case must be supported as well. */ -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); FAIL_IF(!inst); *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); - EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); #else - EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); - EMIT_MOV(compiler, TMP_REG2, 0, src2, src2w); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); - FAIL_IF(!inst); - *inst = XCHG_r_rm; + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); FAIL_IF(!inst); *inst |= mode; EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); - EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); #endif + if (dst != SLJIT_UNUSED) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); } return SLJIT_SUCCESS; @@ -2101,7 +2131,7 @@ static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, if (!set_flags) return emit_mov(compiler, dst, dstw, src1, src1w); /* OR dst, src, 0 */ - return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, + return emit_cum_binary(compiler, BINARY_OPCODE(OR), dst, dstw, src1, src1w, SLJIT_IMM, 0); } @@ -2111,10 +2141,10 @@ static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler, if (!FAST_IS_REG(dst)) FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); - FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); + FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w)); if (FAST_IS_REG(dst)) - return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); + return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0); return SLJIT_SUCCESS; } @@ -2145,10 +2175,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error; } - return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + return emit_cum_binary(compiler, BINARY_OPCODE(ADD), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: - return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, + return emit_cum_binary(compiler, BINARY_OPCODE(ADC), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: if (!HAS_FLAGS(op)) { @@ -2158,23 +2188,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (dst == SLJIT_UNUSED) return emit_cmp_binary(compiler, src1, src1w, src2, src2w); - return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, + return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: - return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, + return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB), dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: if (dst == SLJIT_UNUSED) return emit_test_binary(compiler, src1, src1w, src2, src2w); - return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, + return emit_cum_binary(compiler, BINARY_OPCODE(AND), dst, dstw, src1, src1w, src2, src2w); case SLJIT_OR: - return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, + return emit_cum_binary(compiler, BINARY_OPCODE(OR), dst, dstw, src1, src1w, src2, src2w); case SLJIT_XOR: - return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, + return emit_cum_binary(compiler, BINARY_OPCODE(XOR), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SHL: return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), @@ -2203,7 +2233,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) return reg; +#else + return freg_map[reg]; +#endif } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, @@ -2345,6 +2379,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); src1 = TMP_FREG; } + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w); } @@ -2516,9 +2551,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); type &= 0xff; - if (type >= SLJIT_CALL1) - PTR_FAIL_IF(call_with_args(compiler, type)); - /* Worst case size. */ #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; @@ -2534,14 +2566,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile return jump; } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -#ifndef _WIN64 -#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R3) -#else -#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R2) -#endif -#endif - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { sljit_u8 *inst; @@ -2553,25 +2577,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_EXTRA_REGS(src, srcw, (void)0); - if (type >= SLJIT_CALL1) { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (src == SLJIT_R2) { - EMIT_MOV(compiler, TMP_REG1, 0, src, 0); - src = TMP_REG1; - } - if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) - srcw += sizeof(sljit_sw); -#endif -#else - if ((src & SLJIT_MEM) || IS_REG_CHANGED_BY_CALL(src, type)) { - EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); - src = TMP_REG2; - } -#endif - FAIL_IF(call_with_args(compiler, type)); - } - if (src == SLJIT_IMM) { jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); FAIL_IF_NULL(jump); diff --git a/src/sljit/sljitUtils.c b/src/sljit/sljitUtils.c index 9029db2..2fc274a 100644 --- a/src/sljit/sljitUtils.c +++ b/src/sljit/sljitUtils.c @@ -48,12 +48,12 @@ static SLJIT_INLINE void allocator_release_lock(void) #if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_grab_lock(void) { /* Always successful. */ } -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void) { /* Always successful. */ } @@ -88,7 +88,7 @@ static SLJIT_INLINE void allocator_release_lock(void) static HANDLE global_mutex = 0; -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_grab_lock(void) { /* No idea what to do if an error occures. Static mutexes should never fail... */ if (!global_mutex) @@ -97,7 +97,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) WaitForSingleObject(global_mutex, INFINITE); } -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void) { ReleaseMutex(global_mutex); } @@ -130,12 +130,12 @@ static SLJIT_INLINE void allocator_release_lock(void) static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_grab_lock(void) { pthread_mutex_lock(&global_mutex); } -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void) { pthread_mutex_unlock(&global_mutex); } @@ -203,7 +203,7 @@ static SLJIT_INLINE sljit_s32 open_dev_zero(void) /* Planning to make it even more clever in the future. */ static sljit_sw sljit_page_align = 0; -SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data) { struct sljit_stack *stack; void *ptr; @@ -276,7 +276,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj #undef PAGE_ALIGN -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data) { SLJIT_UNUSED_ARG(allocator_data); #ifdef _WIN32 @@ -287,7 +287,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *st SLJIT_FREE(stack, allocator_data); } -SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_FUNC sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit) { sljit_uw aligned_old_limit; sljit_uw aligned_new_limit;