From e612e06b5d5d98ad1604a088c223174b3db251c1 Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Mon, 11 Apr 2022 05:19:52 +0000 Subject: [PATCH] JIT compiler update --- src/pcre2_jit_compile.c | 2 +- src/sljit/sljitConfigInternal.h | 68 ++-- src/sljit/sljitLir.c | 88 +++-- src/sljit/sljitLir.h | 89 +++-- src/sljit/sljitNativeARM_32.c | 98 ++++-- src/sljit/sljitNativeARM_64.c | 41 ++- src/sljit/sljitNativeARM_T2_32.c | 69 ++-- src/sljit/sljitNativeMIPS_common.c | 49 ++- src/sljit/sljitNativePPC_32.c | 19 +- src/sljit/sljitNativePPC_64.c | 42 +-- src/sljit/sljitNativePPC_common.c | 116 ++++--- src/sljit/sljitNativeS390X.c | 98 +++--- src/sljit/sljitNativeSPARC_32.c | 6 +- src/sljit/sljitNativeSPARC_common.c | 35 +- src/sljit/sljitNativeX86_32.c | 486 +++++++++++++--------------- src/sljit/sljitNativeX86_64.c | 34 +- src/sljit/sljitNativeX86_common.c | 84 +++-- 17 files changed, 811 insertions(+), 613 deletions(-) diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 9193a2c..e47a9f3 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -6597,7 +6597,7 @@ jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0); OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); -OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); +OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); if (HAS_VIRTUAL_REGISTERS) { diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h index 67f0531..55e4e39 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/src/sljit/sljitConfigInternal.h @@ -156,7 +156,7 @@ extern "C" { #define SLJIT_CONFIG_MIPS_32 1 #elif defined(__mips64) #define SLJIT_CONFIG_MIPS_64 1 -#elif defined(__sparc__) || defined(__sparc) +#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64) #define SLJIT_CONFIG_SPARC_32 1 #elif defined(__s390x__) #define SLJIT_CONFIG_S390X 1 @@ -274,9 +274,13 @@ extern "C" { #ifndef SLJIT_INLINE /* Inline functions. Some old compilers do not support them. */ -#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510 +#ifdef __SUNPRO_C +#if __SUNPRO_C < 0x560 #define SLJIT_INLINE #else +#define SLJIT_INLINE inline +#endif /* __SUNPRO_C */ +#else #define SLJIT_INLINE __inline #endif #endif /* !SLJIT_INLINE */ @@ -319,18 +323,36 @@ extern "C" { /* Instruction cache flush. */ /****************************/ +/* + * TODO: + * + * clang >= 15 could be safe to enable below + * older versions are known to abort in some targets + * https://github.com/PhilipHazel/pcre2/issues/92 + * + * beware APPLE is known to have removed the code in iOS so + * it will need to be excempted or result in broken builds + */ #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) -#if __has_builtin(__builtin___clear_cache) +#if __has_builtin(__builtin___clear_cache) && !defined(__clang__) +/* + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248 + * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811 + * gcc's clear_cache builtin for power and sparc are broken + */ +#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32) #define SLJIT_CACHE_FLUSH(from, to) \ __builtin___clear_cache((char*)(from), (char*)(to)) +#endif -#endif /* __has_builtin(__builtin___clear_cache) */ +#endif /* gcc >= 10 */ #endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ #ifndef SLJIT_CACHE_FLUSH -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) /* Not required to implement on archs with unified caches. */ #define SLJIT_CACHE_FLUSH(from, to) @@ -340,9 +362,9 @@ extern "C" { /* Supported by all macs since Mac OS 10.5. However, it does not work on non-jailbroken iOS devices, although the compilation is successful. */ - +#include #define SLJIT_CACHE_FLUSH(from, to) \ - sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) + sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from))) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) @@ -351,18 +373,6 @@ extern "C" { ppc_cache_flush((from), (to)) #define SLJIT_CACHE_FLUSH_OWN_IMPL 1 -#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) - -#define SLJIT_CACHE_FLUSH(from, to) \ - __builtin___clear_cache((char*)(from), (char*)(to)) - -#elif defined __ANDROID__ - -/* Android lacks __clear_cache; instead, cacheflush should be used. */ - -#define SLJIT_CACHE_FLUSH(from, to) \ - cacheflush((long)(from), (long)(to), 0) - #elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) /* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ @@ -370,14 +380,26 @@ extern "C" { sparc_cache_flush((from), (to)) #define SLJIT_CACHE_FLUSH_OWN_IMPL 1 +#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__) + +#define SLJIT_CACHE_FLUSH(from, to) \ + __builtin___clear_cache((char*)(from), (char*)(to)) + +#elif defined __ANDROID__ + +/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */ +#include +#define SLJIT_CACHE_FLUSH(from, to) \ + cacheflush((long)(from), (long)(to), 0) + #elif defined _WIN32 #define SLJIT_CACHE_FLUSH(from, to) \ - FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from)) + FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) #else -/* Calls __ARM_NR_cacheflush on ARM-Linux. */ +/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */ #define SLJIT_CACHE_FLUSH(from, to) \ __clear_cache((char*)(from), (char*)(to)) @@ -781,8 +803,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); /* CPU status flags management. */ /********************************/ -#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ - || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ +#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ || (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c index 4b4affb..313a061 100644 --- a/src/sljit/sljitLir.c +++ b/src/sljit/sljitLir.c @@ -130,7 +130,8 @@ #define FAST_IS_REG(reg) ((reg) <= REG_MASK) /* Mask for argument types. */ -#define SLJIT_ARG_MASK ((1 << SLJIT_ARG_SHIFT) - 1) +#define SLJIT_ARG_MASK 0x7 +#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG) /* Jump flags. */ #define JUMP_LABEL 0x1 @@ -541,7 +542,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_la } #define SLJIT_CURRENT_FLAGS_ALL \ - (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE) + (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE) SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) { @@ -747,11 +748,11 @@ static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) -static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 fscratches, sljit_s32 word_arg_limit) +static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches) { - sljit_s32 word_arg_count, float_arg_count, curr_type; + sljit_s32 word_arg_count, scratch_arg_end, saved_arg_count, float_arg_count, curr_type; - curr_type = (arg_types & SLJIT_ARG_MASK); + curr_type = (arg_types & SLJIT_ARG_FULL_MASK); if (curr_type >= SLJIT_ARG_TYPE_F64) { if (curr_type > SLJIT_ARG_TYPE_F32 || fscratches == 0) @@ -764,21 +765,39 @@ static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratch arg_types >>= SLJIT_ARG_SHIFT; word_arg_count = 0; + scratch_arg_end = 0; + saved_arg_count = 0; float_arg_count = 0; - while (arg_types != 0 && word_arg_count + float_arg_count < 4) { - curr_type = (arg_types & SLJIT_ARG_MASK); - if (curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_F32) + while (arg_types != 0) { + if (word_arg_count + float_arg_count >= 4) return 0; - if (curr_type < SLJIT_ARG_TYPE_F64) + curr_type = (arg_types & SLJIT_ARG_MASK); + + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (saveds == -1 || curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_P) + return 0; + word_arg_count++; - else - float_arg_count++; + scratch_arg_end = word_arg_count; + } else { + if (curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_F32) + return 0; + + if (curr_type < SLJIT_ARG_TYPE_F64) { + word_arg_count++; + saved_arg_count++; + } else + float_arg_count++; + } arg_types >>= SLJIT_ARG_SHIFT; } - return (arg_types == 0 && word_arg_count <= word_arg_limit && float_arg_count <= fscratches); + if (saveds == -1) + return (word_arg_count <= scratches && float_arg_count <= fscratches); + + return (saved_arg_count <= saveds && scratch_arg_end <= scratches && float_arg_count <= fscratches); } #define FUNCTION_CHECK_IS_REG(r) \ @@ -976,7 +995,7 @@ static const char* op0_names[] = { static const char* op1_names[] = { "", ".u8", ".s8", ".u16", ".s16", ".u32", ".s32", "32", - ".p", "not", "neg", "clz", + ".p", "not", "clz", }; static const char* op2_names[] = { @@ -1061,7 +1080,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); + CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL)); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); @@ -1069,21 +1088,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); - CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64); - CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, fscratches, saveds)); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches)); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " enter ret[%s", - call_arg_names[arg_types & SLJIT_ARG_MASK]); + fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) { fprintf(compiler->verbose, "], args["); do { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) fprintf(compiler->verbose, ","); @@ -1091,7 +1110,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil } fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", - (options & SLJIT_F64_ALIGNMENT) ? " align:f64," : "", + (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "", scratches, saveds, fscratches, fsaveds, local_size); } #endif @@ -1105,7 +1124,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); + CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL)); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); @@ -1113,21 +1132,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); - CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64); - CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, fscratches, saveds)); + CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches)); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " set_context ret[%s", - call_arg_names[arg_types & SLJIT_ARG_MASK]); + fprintf(compiler->verbose, " set_context ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) { fprintf(compiler->verbose, "], args["); do { - fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); + fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK], + (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : ""); arg_types >>= SLJIT_ARG_SHIFT; if (arg_types) fprintf(compiler->verbose, ","); @@ -1135,7 +1154,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi } fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", - (options & SLJIT_F64_ALIGNMENT) ? " align:f64," : "", + (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "", scratches, saveds, fscratches, fsaveds, local_size); } #endif @@ -1251,10 +1270,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler /* Only SLJIT_32 and SLJIT_SET_Z are allowed. */ CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); break; - case SLJIT_NEG: - CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) - || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); - break; case SLJIT_MOV: case SLJIT_MOV_U32: case SLJIT_MOV_P: @@ -1644,14 +1659,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); - CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_32)); if ((type & 0xff) < SLJIT_JUMP) { if ((type & 0xff) <= SLJIT_NOT_ZERO) CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); - else + else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CARRY || (type & 0xff) == SLJIT_NOT_CARRY); + compiler->last_flags = 0; + } else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); } @@ -1670,7 +1687,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN))); CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); - CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, compiler->fscratches, compiler->scratches)); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); if (type & SLJIT_CALL_RETURN) { CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); @@ -1777,6 +1794,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compil #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_CALL_RETURN))); CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); FUNCTION_CHECK_SRC(src, srcw); if (type & SLJIT_CALL_RETURN) { @@ -1814,7 +1832,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); - CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); @@ -1823,6 +1840,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY) || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); FUNCTION_CHECK_DST(dst, dstw); diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h index 2c55eea..1162658 100644 --- a/src/sljit/sljitLir.h +++ b/src/sljit/sljitLir.h @@ -256,16 +256,24 @@ extern "C" { /* The following argument type definitions are used by sljit_emit_enter, sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. - As for sljit_emit_enter, the first integer argument is available in - SLJIT_R0, the second one in SLJIT_R1, and so on. Similarly the first - floating point argument is available in SLJIT_FR0, the second one in - SLJIT_FR1, and so on. - As for sljit_emit_call and sljit_emit_icall, the first integer argument must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. Similarly the first floating point argument must be placed into SLJIT_FR0, the second one into SLJIT_FR1, and so on. + As for sljit_emit_enter, the integer arguments can be stored in scratch + or saved registers. The first integer argument without _R postfix is + stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer + arguments with _R postfix are placed into scratch registers. The index + of the scratch register is the count of the previous integer arguments + starting from SLJIT_R0. The floating point arguments are always placed + into SLJIT_FR0, SLJIT_FR1, and so on. + + Note: if a function is called by sljit_emit_call/sljit_emit_icall and + an argument is stored in a scratch register by sljit_emit_enter, + that argument uses the same scratch register index for both + integer and floating point arguments. + Example function definition: sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); @@ -276,27 +284,51 @@ extern "C" { | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) Short form of argument type definition: - SLJIT_ARGS4(F32, P, F64, 32, F32) + SLJIT_ARGS4(32, P, F64, 32, F32) Argument passing: arg_a must be placed in SLJIT_R0 arg_c must be placed in SLJIT_R1 arg_b must be placed in SLJIT_FR0 arg_d must be placed in SLJIT_FR1 - */ + + Examples for argument processing by sljit_emit_enter: + SLJIT_ARGS4(VOID, P, 32_R, F32, W) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1 + + SLJIT_ARGS4(VOID, W, W_R, W, W_R) + Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3 + + SLJIT_ARGS4(VOID, F64, W, F32, W_R) + Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1 + + Note: it is recommended to pass the scratch arguments first + followed by the saved arguments: + + SLJIT_ARGS4(VOID, W_R, W_R, W, W) + Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1 +*/ + +/* The following flag is only allowed for the integer arguments of + sljit_emit_enter. When the flag is set, the integer argument is + stored in a scratch register instead of a saved register. */ +#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8 /* Void result, can only be used by SLJIT_ARG_RETURN. */ -#define SLJIT_ARG_TYPE_VOID 0 +#define SLJIT_ARG_TYPE_VOID 0 /* Machine word sized integer argument or result. */ -#define SLJIT_ARG_TYPE_W 1 +#define SLJIT_ARG_TYPE_W 1 +#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG) /* 32 bit integer argument or result. */ -#define SLJIT_ARG_TYPE_32 2 +#define SLJIT_ARG_TYPE_32 2 +#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG) /* Pointer sized integer argument or result. */ -#define SLJIT_ARG_TYPE_P 3 +#define SLJIT_ARG_TYPE_P 3 +#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG) /* 64 bit floating point argument or result. */ -#define SLJIT_ARG_TYPE_F64 4 +#define SLJIT_ARG_TYPE_F64 4 /* 32 bit floating point argument or result. */ -#define SLJIT_ARG_TYPE_F32 5 +#define SLJIT_ARG_TYPE_F32 5 #define SLJIT_ARG_SHIFT 4 #define SLJIT_ARG_RETURN(type) (type) @@ -415,8 +447,7 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_s32 args_size; sljit_s32 locals_offset; - sljit_s32 saveds_offset; - sljit_s32 stack_tmp_size; + sljit_s32 scratches_offset; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -652,9 +683,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) overwrites the previous context. */ -/* The absolute address returned by sljit_get_local_base with -offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */ -#define SLJIT_F64_ALIGNMENT 0x00000001 +/* The compiled function uses cdecl calling + * convention instead of SLJIT_FUNC. */ +#define SLJIT_ENTER_CDECL 0x00000001 /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ #define SLJIT_MAX_LOCAL_SIZE 65536 @@ -967,14 +998,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile Note: immediate source argument is not supported */ #define SLJIT_NOT (SLJIT_OP1_BASE + 9) #define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32) -/* Flags: Z | OVERFLOW - Note: immediate source argument is not supported */ -#define SLJIT_NEG (SLJIT_OP1_BASE + 10) -#define SLJIT_NEG32 (SLJIT_NEG | SLJIT_32) /* Count leading zeroes Flags: - (may destroy flags) Note: immediate source argument is not supported */ -#define SLJIT_CLZ (SLJIT_OP1_BASE + 11) +#define SLJIT_CLZ (SLJIT_OP1_BASE + 10) #define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, @@ -1175,8 +1202,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) #define SLJIT_NOT_OVERFLOW 11 -/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */ -#define SLJIT_SET_CARRY SLJIT_SET(12) +/* Unlike other flags, sljit_emit_jump may destroy this flag. */ +#define SLJIT_CARRY 12 +#define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) +#define SLJIT_NOT_CARRY 13 /* Floating point comparison types. */ #define SLJIT_EQUAL_F64 14 @@ -1538,12 +1567,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Flags were set by a 32 bit operation. */ #define SLJIT_CURRENT_FLAGS_32 SLJIT_32 -/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */ -#define SLJIT_CURRENT_FLAGS_ADD_SUB 0x01 +/* Flags were set by an ADD or ADDC operations. */ +#define SLJIT_CURRENT_FLAGS_ADD 0x01 +/* Flags were set by a SUB, SUBC, or NEG operation. */ +#define SLJIT_CURRENT_FLAGS_SUB 0x02 /* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode. - Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */ -#define SLJIT_CURRENT_FLAGS_COMPARE 0x02 + Must be combined with SLJIT_CURRENT_FLAGS_SUB. */ +#define SLJIT_CURRENT_FLAGS_COMPARE 0x04 /* Define the currently available CPU status flags. It is usually used after an sljit_emit_label or sljit_emit_op_custom operations to define which CPU diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c index 4fdbbf0..7b87f59 100644 --- a/src/sljit/sljitNativeARM_32.c +++ b/src/sljit/sljitNativeARM_32.c @@ -982,6 +982,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define ALLOW_IMM 0x10 #define ALLOW_INV_IMM 0x20 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ALLOW_NEG_IMM 0x40 /* s/l - store/load (1 bit) u/s - signed/unsigned (1 bit) @@ -1048,7 +1049,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_uw imm, offset; - sljit_s32 i, tmp, size, word_arg_count; + sljit_s32 i, tmp, size, word_arg_count, saved_arg_count; #ifdef __SOFTFP__ sljit_u32 float_arg_count; #else @@ -1104,6 +1105,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi arg_types >>= SLJIT_ARG_SHIFT; word_arg_count = 0; + saved_arg_count = 0; #ifdef __SOFTFP__ SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); @@ -1122,7 +1124,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP) | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); float_arg_count++; - offset += sizeof(sljit_f64); + offset += sizeof(sljit_f64) - sizeof(sljit_sw); break; case SLJIT_ARG_TYPE_F32: if (offset < 4 * sizeof(sljit_sw)) @@ -1131,18 +1133,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP) | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); float_arg_count++; - offset += sizeof(sljit_f32); break; default: + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + if (offset < 4 * sizeof(sljit_sw)) - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - word_arg_count) | (offset >> 2))); + FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2))); else FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 - | RN(SLJIT_SP) | RD(SLJIT_S0 - word_arg_count) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); - word_arg_count++; - offset += sizeof(sljit_sw); + | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); break; } + + offset += sizeof(sljit_sw); arg_types >>= SLJIT_ARG_SHIFT; } @@ -1173,7 +1184,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi offset++; break; default: - FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - word_arg_count) | RM(SLJIT_R0 + word_arg_count))); + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + word_arg_count++; break; } @@ -1405,7 +1420,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1417,7 +1431,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_SUB: SLJIT_ASSERT(!(flags & INV_IMM)); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); @@ -1765,15 +1778,35 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 /* We prefers register and simple consts. */ sljit_s32 dst_reg; sljit_s32 src1_reg; - sljit_s32 src2_reg; + sljit_s32 src2_reg = 0; sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 neg_op = 0; if (dst == TMP_REG2) flags |= UNUSED_RETURN; SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); - src2_reg = 0; + if (inp_flags & ALLOW_NEG_IMM) { + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUB; + break; + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + neg_op = SLJIT_SUBC; + break; + case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADD; + break; + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + neg_op = SLJIT_ADDC; + break; + } + } do { if (!(inp_flags & ALLOW_IMM)) @@ -1790,17 +1823,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } } - if (GET_OPCODE(op) == SLJIT_ADD) { + if (neg_op != 0) { src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w); if (src2_reg) { - op = SLJIT_SUB | GET_ALL_FLAGS(op); - break; - } - } - if (GET_OPCODE(op) == SLJIT_SUB) { - src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w); - if (src2_reg) { - op = SLJIT_ADD | GET_ALL_FLAGS(op); + op = neg_op | GET_ALL_FLAGS(op); break; } } @@ -1823,13 +1849,13 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 break; } } - if (GET_OPCODE(op) == SLJIT_ADD) { + if (neg_op >= SLJIT_SUB) { + /* Note: additive operation (commutative). */ src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w); if (src2_reg) { - /* Note: add is commutative operation. */ src1 = src2; src1w = src2w; - op = SLJIT_SUB | GET_ALL_FLAGS(op); + op = neg_op | GET_ALL_FLAGS(op); break; } } @@ -2007,13 +2033,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); - case SLJIT_CLZ: return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); } @@ -2037,6 +2056,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile case SLJIT_ADDC: case SLJIT_SUB: case SLJIT_SUBC: + return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: case SLJIT_XOR: return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); @@ -2126,6 +2147,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -2385,10 +2407,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x10000000; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x20000000; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x30000000; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x30000000; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x20000000; @@ -2414,7 +2446,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xd0000000; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x10000000; /* fallthrough */ @@ -2422,7 +2454,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x60000000; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x00000000; /* fallthrough */ diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c index de29e8f..96453b4 100644 --- a/src/sljit/sljitNativeARM_64.c +++ b/src/sljit/sljitNativeARM_64.c @@ -630,7 +630,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s switch (op) { case SLJIT_MUL: - case SLJIT_NEG: case SLJIT_CLZ: case SLJIT_ADDC: case SLJIT_SUBC: @@ -645,12 +644,15 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); goto set_flags; case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) break; imm = -imm; /* Fall through. */ case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + if (op != SLJIT_SUB) + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (imm == 0) { CHECK_FLAGS(1 << 29); return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); @@ -791,27 +793,23 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s SLJIT_ASSERT(arg1 == TMP_REG1); FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); break; /* Set flags. */ - case SLJIT_NEG: - SLJIT_ASSERT(arg1 == TMP_REG1); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - if (flags & SET_FLAGS) - inv_bits |= 1 << 29; - return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); case SLJIT_CLZ: SLJIT_ASSERT(arg1 == TMP_REG1); return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; CHECK_FLAGS(1 << 29); - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; CHECK_FLAGS(1 << 29); return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_MUL: @@ -1012,9 +1010,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); #endif /* _WIN32 */ + tmp = 0; while (arg_types > 0) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - word_arg_count) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count))); + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count))); + tmp++; + } word_arg_count++; } arg_types >>= SLJIT_ARG_SHIFT; @@ -1452,6 +1454,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_u32 size) { + SLJIT_UNUSED_ARG(size); CHECK_ERROR(); CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); @@ -1703,10 +1706,20 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x0; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x2; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x3; @@ -1732,7 +1745,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xc; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x0; /* fallthrough */ @@ -1740,7 +1753,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x7; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x1; /* fallthrough */ @@ -1798,6 +1811,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); @@ -1880,6 +1894,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c index 90db258..ed21ea7 100644 --- a/src/sljit/sljitNativeARM_T2_32.c +++ b/src/sljit/sljitNativeARM_T2_32.c @@ -620,7 +620,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ break; case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; nimm = NEGATE(imm); if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) @@ -648,13 +648,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); break; case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; imm = get_imm(imm); if (imm != INVALID_IMM) return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; case SLJIT_SUB: - /* SUB operation can be replaced by ADD because of the negative carry flag. */ - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) { if (imm == 0 && IS_2_LO_REGS(reg, dst)) return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); @@ -672,6 +672,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s nimm = get_imm(NEGATE(imm)); if (nimm != INVALID_IMM) return push_inst32(compiler, CMNI_W | RN4(reg) | nimm); + break; } nimm = NEGATE(imm); if (IS_2_LO_REGS(reg, dst)) { @@ -700,6 +701,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); break; case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) break; imm = get_imm(imm); @@ -814,18 +816,19 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); return SLJIT_SUCCESS; case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; if (IS_3_LO_REGS(dst, arg1, arg2)) return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS)) return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUB: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & UNUSED_RETURN) { if (IS_2_LO_REGS(arg1, arg2)) return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); @@ -835,6 +838,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); @@ -1078,7 +1082,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 size, i, tmp, word_arg_count; + sljit_s32 size, i, tmp, word_arg_count, saved_arg_count; sljit_uw offset; sljit_uw imm = 0; #ifdef __SOFTFP__ @@ -1129,6 +1133,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi arg_types >>= SLJIT_ARG_SHIFT; word_arg_count = 0; + saved_arg_count = 0; #ifdef __SOFTFP__ SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); @@ -1147,7 +1152,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); float_arg_count++; - offset += sizeof(sljit_f64); + offset += sizeof(sljit_f64) - sizeof(sljit_sw); break; case SLJIT_ARG_TYPE_F32: if (offset < 4 * sizeof(sljit_sw)) @@ -1156,21 +1161,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP) | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); float_arg_count++; - offset += sizeof(sljit_f32); break; default: - SLJIT_ASSERT(reg_map[SLJIT_S0 - word_arg_count] <= 7); + word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2)) + tmp = word_arg_count; + else + break; + + SLJIT_ASSERT(reg_map[tmp] <= 7); if (offset < 4 * sizeof(sljit_sw)) - FAIL_IF(push_inst16(compiler, MOV | RD3(SLJIT_S0 - word_arg_count) | (offset << 1))); + FAIL_IF(push_inst16(compiler, MOV | RD3(tmp) | (offset << 1))); else - FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(SLJIT_S0 - word_arg_count) + FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); - - word_arg_count++; - offset += sizeof(sljit_sw); break; } + + offset += sizeof(sljit_sw); arg_types >>= SLJIT_ARG_SHIFT; } @@ -1201,7 +1214,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi offset++; break; default: - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - word_arg_count, SLJIT_R0 + word_arg_count))); + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } + word_arg_count++; break; } @@ -1602,14 +1619,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); } - if (op == SLJIT_NEG) { -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); - } - flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; if (src & SLJIT_MEM) { @@ -1982,10 +1991,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_NOT_EQUAL_F64: return 0x1; + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x2; + /* fallthrough */ + case SLJIT_LESS: case SLJIT_LESS_F64: return 0x3; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) + return 0x3; + /* fallthrough */ + case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL_F64: return 0x2; @@ -2011,7 +2030,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0xd; case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x1; /* fallthrough */ @@ -2019,7 +2038,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x6; case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return 0x0; /* fallthrough */ diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c index c2a4438..be5cb22 100644 --- a/src/sljit/sljitNativeMIPS_common.c +++ b/src/sljit/sljitNativeMIPS_common.c @@ -770,7 +770,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi { sljit_ins base; sljit_s32 i, tmp, offset; - sljit_s32 arg_count, word_arg_count, float_arg_count; + sljit_s32 arg_count, word_arg_count, saved_arg_count, float_arg_count; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); @@ -863,6 +863,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi arg_types >>= SLJIT_ARG_SHIFT; arg_count = 0; word_arg_count = 0; + saved_arg_count = 0; float_arg_count = 0; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -901,13 +902,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, LWC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); break; default: - if (arg_count < 4) - FAIL_IF(push_inst(compiler, ADDU_W | SA(4 + arg_count) | TA(0) | D(SLJIT_S0 - word_arg_count), - DR(SLJIT_S0 - word_arg_count))); - else - FAIL_IF(push_inst(compiler, LW | base | T(SLJIT_S0 - word_arg_count) | IMM(local_size + (arg_count << 2)), - DR(SLJIT_S0 - word_arg_count))); word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count + 1 || arg_count == 0) + tmp = word_arg_count; + else + break; + + if (arg_count < 4) + FAIL_IF(push_inst(compiler, ADDU_W | SA(4 + arg_count) | TA(0) | D(tmp), DR(tmp))); + else + FAIL_IF(push_inst(compiler, LW | base | T(tmp) | IMM(local_size + (arg_count << 2)), DR(tmp))); break; } arg_count++; @@ -934,8 +942,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); break; default: - FAIL_IF(push_inst(compiler, ADDU_W | SA(3 + arg_count) | TA(0) | D(SLJIT_S0 - word_arg_count), DR(SLJIT_S0 - word_arg_count))); word_arg_count++; + + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (word_arg_count != arg_count || word_arg_count <= 1) + tmp = word_arg_count; + else + break; + + FAIL_IF(push_inst(compiler, ADDU_W | SA(3 + arg_count) | TA(0) | D(tmp), DR(tmp))); break; } arg_types >>= SLJIT_ARG_SHIFT; @@ -1566,10 +1583,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; - return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); - case SLJIT_CLZ: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); } @@ -1604,12 +1617,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: case SLJIT_SUBC: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: @@ -1723,7 +1736,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) # define flags (sljit_u32)0 #else - sljit_u32 flags = (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) << 21; + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; #endif if (src & SLJIT_MEM) { @@ -1751,7 +1764,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) # define flags (sljit_u32)0 #else - sljit_u32 flags = (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) << 21; + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; #endif sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -2053,6 +2066,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_LESS: case SLJIT_SIG_GREATER: case SLJIT_OVERFLOW: + case SLJIT_CARRY: BR_Z(OTHER_FLAG); break; case SLJIT_GREATER_EQUAL: @@ -2060,6 +2074,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: BR_NZ(OTHER_FLAG); break; case SLJIT_NOT_EQUAL_F64: @@ -2316,7 +2331,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; case SLJIT_OVERFLOW: case SLJIT_NOT_OVERFLOW: - if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB) { + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { src_ar = OTHER_FLAG; break; } diff --git a/src/sljit/sljitNativePPC_32.c b/src/sljit/sljitNativePPC_32.c index 216d8ec..95fe6bb 100644 --- a/src/sljit/sljitNativePPC_32.c +++ b/src/sljit/sljitNativePPC_32.c @@ -86,11 +86,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src1 == TMP_REG1); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); - case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); return push_inst(compiler, CNTLZW | S(src2) | A(dst)); @@ -158,7 +153,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & ALT_FORM3) { /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); } if (flags & ALT_FORM4) { @@ -167,11 +164,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (!(flags & ALT_SET_FLAGS)) + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + if (flags & ALT_FORM5) return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); case SLJIT_SUBC: return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); diff --git a/src/sljit/sljitNativePPC_64.c b/src/sljit/sljitNativePPC_64.c index b267060..d104f6d 100644 --- a/src/sljit/sljitNativePPC_64.c +++ b/src/sljit/sljitNativePPC_64.c @@ -199,19 +199,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl UN_EXTS(); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); - case SLJIT_NEG: - SLJIT_ASSERT(src1 == TMP_REG1); - - if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) { - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); - FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2))); - return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); - } - - UN_EXTS(); - /* Setting XER SO is not enough, CR SO is also needed. */ - return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); - case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); if (flags & ALT_FORM1) @@ -299,13 +286,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (flags & ALT_FORM3) { if (flags & ALT_SIGN_EXT) { - FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); - src1 = TMP_REG1; - FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); - src2 = TMP_REG2; + if (src1 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); + src1 = TMP_REG1; + } + if (src2 != TMP_ZERO) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + src2 = TMP_REG2; + } } + /* Setting XER SO is not enough, CR SO is also needed. */ - FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + if (src1 != TMP_ZERO) + FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + else + FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2))); + if (flags & ALT_SIGN_EXT) return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); return SLJIT_SUCCESS; @@ -317,12 +313,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (!(flags & ALT_SET_FLAGS)) + if (!(flags & ALT_SET_FLAGS)) { + SLJIT_ASSERT(src1 != TMP_ZERO); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + BIN_EXTS(); if (flags & ALT_FORM5) return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); + + if (src1 != TMP_ZERO) + return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2)); case SLJIT_SUBC: BIN_EXTS(); diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c index 1a21cfe..8bfdc69 100644 --- a/src/sljit/sljitNativePPC_common.c +++ b/src/sljit/sljitNativePPC_common.c @@ -721,6 +721,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi { sljit_s32 i, tmp, base, offset; sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = 0; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) sljit_s32 arg_count = 0; #endif @@ -791,9 +792,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi while (arg_types > 0) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + arg_count) | A(SLJIT_S0 - word_arg_count) | B(SLJIT_R0 + arg_count))); + do { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = SLJIT_S0 - saved_arg_count; + saved_arg_count++; + } else if (arg_count != word_arg_count) + tmp = SLJIT_R0 + word_arg_count; + else + break; + + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + arg_count) | A(tmp) | B(SLJIT_R0 + arg_count))); + } while (0); #else - FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + word_arg_count) | A(SLJIT_S0 - word_arg_count) | B(SLJIT_R0 + word_arg_count))); + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + word_arg_count) | A(SLJIT_S0 - saved_arg_count) | B(SLJIT_R0 + word_arg_count))); + saved_arg_count++; + } #endif word_arg_count++; } @@ -1153,8 +1167,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= REG1_SOURCE; } else if (src1 & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; + src1_r = TMP_ZERO; + if (src1w != 0) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } } else { FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); @@ -1170,8 +1187,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 dst_r = src2_r; } else if (src2 & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; + src2_r = TMP_ZERO; + if (src2w != 0) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } } else { FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2)); @@ -1277,8 +1297,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src, srcw); op = GET_OPCODE(op); - if ((src & SLJIT_IMM) && srcw == 0) - src = TMP_ZERO; if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); @@ -1343,9 +1361,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_NEG: - return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw); - case SLJIT_CLZ: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); @@ -1424,11 +1439,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - if ((src1 & SLJIT_IMM) && src1w == 0) - src1 = TMP_ZERO; - if ((src2 & SLJIT_IMM) && src2w == 0) - src2 = TMP_ZERO; - #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (op & SLJIT_32) { /* Most operations expect sign extended arguments. */ @@ -1446,6 +1456,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + if (TEST_ADD_FORM1(op)) return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); @@ -1503,9 +1515,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { if (dst == TMP_REG2) { if (TEST_UL_IMM(src2, src2w)) { @@ -1567,6 +1582,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: @@ -2025,12 +2041,22 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi return label; } -static sljit_ins get_bo_bi_flags(sljit_s32 type) +static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (4 << 21) | (2 << 16); + /* fallthrough */ + case SLJIT_EQUAL: return (12 << 21) | (2 << 16); + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (12 << 21) | (2 << 16); + /* fallthrough */ + case SLJIT_NOT_EQUAL: return (4 << 21) | (2 << 16); @@ -2094,7 +2120,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_jump(compiler, type)); - bo_bi_flags = get_bo_bi_flags(type & 0xff); + bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff); if (!bo_bi_flags) return NULL; @@ -2103,6 +2129,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP); type &= 0xff; + if (type == SLJIT_CARRY || type == SLJIT_NOT_CARRY) + PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO))); + /* In PPC, we don't need to touch the arguments. */ if (type < SLJIT_JUMP) jump->flags |= IS_COND; @@ -2227,7 +2256,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 type) { sljit_s32 reg, invert; - sljit_u32 cr_bit; + sljit_u32 bit, from_xer; sljit_s32 saved_op = op; sljit_sw saved_dstw = dstw; #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -2247,7 +2276,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1)); invert = 0; - cr_bit = 0; + bit = 0; + from_xer = 0; switch (type & 0xff) { case SLJIT_LESS: @@ -2261,66 +2291,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co case SLJIT_GREATER: case SLJIT_SIG_GREATER: - cr_bit = 1; + bit = 1; break; case SLJIT_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL: - cr_bit = 1; + bit = 1; invert = 1; break; case SLJIT_EQUAL: - cr_bit = 2; + bit = 2; break; case SLJIT_NOT_EQUAL: - cr_bit = 2; + bit = 2; invert = 1; break; case SLJIT_OVERFLOW: - cr_bit = 3; + from_xer = 1; + bit = 1; break; case SLJIT_NOT_OVERFLOW: - cr_bit = 3; + from_xer = 1; + bit = 1; invert = 1; break; + case SLJIT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) != 0; + break; + + case SLJIT_NOT_CARRY: + from_xer = 1; + bit = 2; + invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0; + break; + case SLJIT_LESS_F64: - cr_bit = 4 + 0; + bit = 4 + 0; break; case SLJIT_GREATER_EQUAL_F64: - cr_bit = 4 + 0; + bit = 4 + 0; invert = 1; break; case SLJIT_GREATER_F64: - cr_bit = 4 + 1; + bit = 4 + 1; break; case SLJIT_LESS_EQUAL_F64: - cr_bit = 4 + 1; + bit = 4 + 1; invert = 1; break; case SLJIT_EQUAL_F64: - cr_bit = 4 + 2; + bit = 4 + 2; break; case SLJIT_NOT_EQUAL_F64: - cr_bit = 4 + 2; + bit = 4 + 2; invert = 1; break; case SLJIT_UNORDERED_F64: - cr_bit = 4 + 3; + bit = 4 + 3; break; case SLJIT_ORDERED_F64: - cr_bit = 4 + 3; + bit = 4 + 3; invert = 1; break; @@ -2329,8 +2373,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; } - FAIL_IF(push_inst(compiler, MFCR | D(reg))); - FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + cr_bit) << 11) | (31 << 6) | (31 << 1))); + FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg))); + FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + bit) << 11) | (31 << 6) | (31 << 1))); if (invert) FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1)); diff --git a/src/sljit/sljitNativeS390X.c b/src/sljit/sljitNativeS390X.c index ec9d4ed..8eef910 100644 --- a/src/sljit/sljitNativeS390X.c +++ b/src/sljit/sljitNativeS390X.c @@ -198,7 +198,8 @@ static sljit_s32 encode_inst(void **ptr, sljit_ins ins) } #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ - (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) + (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \ + && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE)) /* Map the given type to a 4-bit condition code mask. */ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { @@ -256,10 +257,20 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t case SLJIT_LESS_F64: return cc1; + case SLJIT_NOT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc2 | cc3); + /* fallthrough */ + case SLJIT_SIG_LESS_EQUAL: case SLJIT_LESS_EQUAL_F64: return (cc0 | cc1); + case SLJIT_CARRY: + if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) + return (cc0 | cc1); + /* fallthrough */ + case SLJIT_SIG_GREATER: /* Overflow is considered greater, see SLJIT_SUB. */ return cc2 | cc3; @@ -1037,8 +1048,8 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr src_r = tmp1; sljit_s32 needs_move = 1; - if (IS_GPR_REG(dst)) { - dst_r = gpr(dst & REG_MASK); + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); if (dst == src1) needs_move = 0; @@ -1052,7 +1063,7 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); if (FAST_IS_REG(src2)) - src_r = gpr(src2 & REG_MASK); + src_r = gpr(src2); else FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); @@ -1065,6 +1076,21 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins, return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); } +static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins, + sljit_s32 dst, + sljit_s32 src1, sljit_sw src1w) +{ + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; + sljit_gpr src_r = tmp1; + + if (FAST_IS_REG(src1)) + src_r = gpr(src1); + else + FAIL_IF(emit_move(compiler, tmp1, src1, src1w)); + + return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)); +} + static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 dst, sljit_s32 src1, sljit_sw src1w, @@ -1075,12 +1101,12 @@ static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr src2_r = tmp1; if (FAST_IS_REG(src1)) - src1_r = gpr(src1 & REG_MASK); + src1_r = gpr(src1); else FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); if (FAST_IS_REG(src2)) - src2_r = gpr(src2 & REG_MASK); + src2_r = gpr(src2); else FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); @@ -1101,8 +1127,8 @@ static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins, sljit_gpr dst_r = tmp0; sljit_s32 needs_move = 1; - if (IS_GPR_REG(dst)) { - dst_r = gpr(dst & REG_MASK); + if (FAST_IS_REG(dst)) { + dst_r = gpr(dst); if (dst == src1) needs_move = 0; @@ -1121,7 +1147,7 @@ static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 src1, sljit_sw src1w, sljit_sw src2w) { - sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; + sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0; sljit_gpr src_r = tmp0; if (!FAST_IS_REG(src1)) @@ -1149,7 +1175,7 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins, SLJIT_ASSERT(src2 & SLJIT_MEM); - if (IS_GPR_REG(dst)) { + if (FAST_IS_REG(dst)) { dst_r = gpr(dst); if (dst == src1) @@ -1610,7 +1636,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 arg_count = 0; + sljit_s32 word_arg_count = 0; sljit_s32 offset, i, tmp; CHECK_ERROR(); @@ -1659,10 +1685,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); arg_types >>= SLJIT_ARG_SHIFT; + tmp = 0; while (arg_types > 0) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - arg_count), gpr(SLJIT_R0 + arg_count)))); - arg_count++; + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count)))); + tmp++; + } + word_arg_count++; } arg_types >>= SLJIT_ARG_SHIFT; @@ -2088,14 +2118,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile FAIL_IF(push_inst(compiler, xr(dst_r, tmp1))); } break; - case SLJIT_NEG: - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r))); - break; - case SLJIT_NEG32: - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - FAIL_IF(push_inst(compiler, lcr(dst_r, src_r))); - break; case SLJIT_CLZ: if (have_eimm()) { FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */ @@ -2149,20 +2171,6 @@ static SLJIT_INLINE int is_shift(sljit_s32 op) { return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0; } -static SLJIT_INLINE int sets_signed_flag(sljit_s32 op) -{ - switch (GET_FLAG_TYPE(op)) { - case SLJIT_OVERFLOW: - case SLJIT_NOT_OVERFLOW: - case SLJIT_SIG_LESS: - case SLJIT_SIG_LESS_EQUAL: - case SLJIT_SIG_GREATER: - case SLJIT_SIG_GREATER_EQUAL: - return 1; - } - return 0; -} - static const struct ins_forms add_forms = { 0x1a00, /* ar */ 0xb9080000, /* agr */ @@ -2267,13 +2275,14 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - int sets_signed = sets_signed_flag(op); + sljit_s32 flag_type = GET_FLAG_TYPE(op); + int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW); int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); const struct ins_forms *forms; sljit_ins ins; - if (dst == (sljit_s32)tmp0 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { - int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS; + if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) { + int compare_signed = flag_type >= SLJIT_SIG_LESS; compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; @@ -2314,6 +2323,12 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op, return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); } + if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) { + ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */; + FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w)); + goto done; + } + if (src2 & SLJIT_IMM) { sljit_sw neg_src2w = -src2w; @@ -2663,9 +2678,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile compiler->mode = op & SLJIT_32; compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); - if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC) - compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB; - if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { src1 ^= src2; src2 ^= src1; @@ -2678,15 +2690,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile switch (GET_OPCODE(op)) { case SLJIT_ADD: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD; FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w)); if (dst & SLJIT_MEM) return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return SLJIT_SUCCESS; case SLJIT_SUB: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: + compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB; FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w)); if (dst & SLJIT_MEM) return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); diff --git a/src/sljit/sljitNativeSPARC_32.c b/src/sljit/sljitNativeSPARC_32.c index 932b1fd..218992b 100644 --- a/src/sljit/sljitNativeSPARC_32.c +++ b/src/sljit/sljitNativeSPARC_32.c @@ -88,17 +88,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SUB: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); case SLJIT_MUL: diff --git a/src/sljit/sljitNativeSPARC_common.c b/src/sljit/sljitNativeSPARC_common.c index 62e2406..c8d19e1 100644 --- a/src/sljit/sljitNativeSPARC_common.c +++ b/src/sljit/sljitNativeSPARC_common.c @@ -512,9 +512,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 reg_index, types; + sljit_s32 reg_index, types, tmp; sljit_u32 float_offset, args_offset; - sljit_s32 word_arg_index, float_arg_index; + sljit_s32 saved_arg_index, scratch_arg_index, float_arg_index; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); @@ -564,7 +564,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi args_offset = (16 + 1 + 6) * sizeof(sljit_sw); float_offset = 16 * sizeof(sljit_sw); reg_index = 24; - word_arg_index = 24; + saved_arg_index = 24; + scratch_arg_index = 8 - 1; float_arg_index = 1; while (arg_types) { @@ -593,14 +594,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi float_offset += sizeof(sljit_f64); break; default: - if (reg_index != word_arg_index) { - if (reg_index < 24 + 6) - FAIL_IF(push_inst(compiler, OR | DA(word_arg_index) | S1(0) | S2A(reg_index), word_arg_index)); - else - FAIL_IF(push_inst(compiler, LDUW | DA(word_arg_index) | S1A(30) | IMM(args_offset), word_arg_index)); - } + scratch_arg_index++; - word_arg_index++; + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + tmp = saved_arg_index++; + if (tmp == reg_index) + break; + } else + tmp = scratch_arg_index; + + if (reg_index < 24 + 6) + FAIL_IF(push_inst(compiler, OR | DA(tmp) | S1(0) | S2A(reg_index), tmp)); + else + FAIL_IF(push_inst(compiler, LDUW | DA(tmp) | S1A(30) | IMM(args_offset), tmp)); break; } @@ -1018,9 +1024,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile case SLJIT_NOT: case SLJIT_CLZ: return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_NEG: - return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); } return SLJIT_SUCCESS; @@ -1395,10 +1398,12 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) case SLJIT_LESS: case SLJIT_GREATER_F64: /* Unordered. */ + case SLJIT_CARRY: return DA(0x5); case SLJIT_GREATER_EQUAL: case SLJIT_LESS_EQUAL_F64: + case SLJIT_NOT_CARRY: return DA(0xd); case SLJIT_GREATER: @@ -1422,7 +1427,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return DA(0x2); case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return DA(0x9); /* fallthrough */ @@ -1430,7 +1435,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return DA(0x7); case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) + if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) return DA(0x1); /* fallthrough */ diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c index 53b71d4..b9a7b39 100644 --- a/src/sljit/sljitNativeX86_32.c +++ b/src/sljit/sljitNativeX86_32.c @@ -243,13 +243,23 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ return code_ptr; } +#define ENTER_R2_USED 0x00001 +#define ENTER_R2_TO_S 0x00002 +#define ENTER_R2_TO_R0 0x00004 +#define ENTER_R1_TO_S 0x00008 +#define ENTER_TMP_TO_R4 0x00010 +#define ENTER_TMP_TO_S 0x00020 + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 word_arg_count, float_arg_count, args_size, types; - sljit_uw size; + sljit_s32 word_arg_count, saved_arg_count, float_arg_count; + sljit_s32 size, locals_offset, args_size, types, status; sljit_u8 *inst; +#ifdef _WIN32 + sljit_s32 r2_offset = -1; +#endif CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); @@ -263,8 +273,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi arg_types >>= SLJIT_ARG_SHIFT; types = arg_types; word_arg_count = 0; + saved_arg_count = 0; float_arg_count = 0; args_size = SSIZE_OF(sw); + status = 0; while (types) { switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: @@ -279,12 +291,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi break; default: word_arg_count++; + + if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + saved_arg_count++; + if (saved_arg_count == 4) + status |= ENTER_TMP_TO_S; + } else { + if (word_arg_count == 4) + status |= ENTER_TMP_TO_R4; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (word_arg_count > 2) - args_size += SSIZE_OF(sw); -#else - args_size += SSIZE_OF(sw); + if (word_arg_count == 3) + status |= ENTER_R2_USED; #endif + } + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL)) + break; +#endif + + args_size += SSIZE_OF(sw); break; } types >>= SLJIT_ARG_SHIFT; @@ -294,30 +320,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi compiler->args_size = args_size; /* [esp+0] for saving temporaries and function calls. */ - compiler->stack_tmp_size = 2 * SSIZE_OF(sw); + locals_offset = 2 * SSIZE_OF(sw); -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (scratches > 3) - compiler->stack_tmp_size = 3 * SSIZE_OF(sw); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if ((options & SLJIT_ENTER_CDECL) && scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#else + if (scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); #endif - compiler->saveds_offset = compiler->stack_tmp_size; - if (scratches > 3) - compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw); + compiler->scratches_offset = locals_offset; - compiler->locals_offset = compiler->saveds_offset; + if (scratches > 3) + locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw); if (saveds > 3) - compiler->locals_offset += (saveds - 3) * SSIZE_OF(sw); + locals_offset += (saveds - 3) * SSIZE_OF(sw); - if (options & SLJIT_F64_ALIGNMENT) - compiler->locals_offset = (compiler->locals_offset + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1); + compiler->locals_offset = locals_offset; - size = (sljit_uw)(1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)); - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1)); FAIL_IF(!inst); - INC_SIZE(size); + INC_SIZE((sljit_uw)size); PUSH_REG(reg_map[TMP_REG1]); if (saveds > 2 || scratches > 9) PUSH_REG(reg_map[SLJIT_S2]); @@ -326,11 +353,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (saveds > 0 || scratches > 11) PUSH_REG(reg_map[SLJIT_S0]); - if (word_arg_count >= 4) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + (sljit_s32)(size * sizeof(sljit_sw))); + size *= SSIZE_OF(sw); + + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size); + + size += SSIZE_OF(sw); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + size += args_size; +#endif + + local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size; + compiler->local_size = local_size; + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + size -= args_size; +#endif word_arg_count = 0; - args_size = (sljit_s32)((size + 1) * sizeof(sljit_sw)); + saved_arg_count = 0; + args_size = size; while (arg_types) { switch (arg_types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: @@ -341,12 +386,41 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi break; default: word_arg_count++; - if (word_arg_count <= 3) { + #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (word_arg_count <= 2) - break; + if (!(options & SLJIT_ENTER_CDECL) && word_arg_count <= 2) { + if (word_arg_count == 1) { + if (status & ENTER_R2_USED) { + EMIT_MOV(compiler, (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? SLJIT_R0 : SLJIT_S0, 0, SLJIT_R2, 0); + } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + status |= ENTER_R2_TO_S; + saved_arg_count++; + } else + status |= ENTER_R2_TO_R0; + } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + status |= ENTER_R1_TO_S; + saved_arg_count++; + } + break; + } #endif - EMIT_MOV(compiler, SLJIT_S0 + 1 - word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + SLJIT_ASSERT(word_arg_count <= 3 || (status & ENTER_TMP_TO_R4)); + + if (word_arg_count <= 3) { +#ifdef _WIN32 + if (word_arg_count == 3 && local_size > 4 * 4096) + r2_offset = local_size + args_size; + else +#endif + EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + } + } else { + SLJIT_ASSERT(saved_arg_count <= 3 || (status & ENTER_TMP_TO_S)); + + if (saved_arg_count <= 3) + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + saved_arg_count++; } args_size += SSIZE_OF(sw); break; @@ -355,95 +429,71 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (word_arg_count > 0) - EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0); - if (word_arg_count > 1) - EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_R1, 0); + if (!(options & SLJIT_ENTER_CDECL)) { + if (status & ENTER_R2_TO_R0) + EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0); + + saved_arg_count = 0; + if (status & ENTER_R2_TO_S) { + EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0); + saved_arg_count++; + } + + if (status & ENTER_R1_TO_S) + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_R1, 0); + } #endif SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); -#if defined(__APPLE__) - /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */ - saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw); - local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; -#else - if (options & SLJIT_F64_ALIGNMENT) - local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1)); - else - local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(sw) - 1) & ~(SSIZE_OF(sw) - 1)); -#endif - - compiler->local_size = local_size; - #ifdef _WIN32 - if (local_size > 0) { + SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096); + + if (local_size > 4096) { if (local_size <= 4 * 4096) { - if (local_size > 4096) - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + if (local_size > 2 * 4096) - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); if (local_size > 3 * 4096) - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); } else { - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); - EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12); - SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); - - EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -4096); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1)); + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); INC_SIZE(2); - inst[0] = JNE_i8; - inst[1] = (sljit_s8) -16; + inst[0] = LOOP_i8; + inst[1] = (sljit_u8)-16; + local_size &= 0xfff; } } -#endif + + if (local_size > 0) { + BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); + } + + if (r2_offset != -1) + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset); + +#else /* !_WIN32 */ SLJIT_ASSERT(local_size > 0); -#if !defined(__APPLE__) - if (options & SLJIT_F64_ALIGNMENT) { - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); - /* Some space might allocated during sljit_grow_stack() above on WIN32. */ - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + SSIZE_OF(sw))); +#endif /* _WIN32 */ -#if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->local_size > 1024) - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, SSIZE_OF(sw))); -#endif - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!inst); - - INC_SIZE(6); - inst[0] = GROUP_BINARY_81; - inst[1] = MOD_REG | AND | reg_map[SLJIT_SP]; - sljit_unaligned_store_sw(inst + 2, ~(SSIZE_OF(f64) - 1)); - - if (word_arg_count == 4) - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), compiler->locals_offset - SSIZE_OF(sw), TMP_REG1, 0); - - /* The real local size must be used. */ - return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, SLJIT_R0, 0); + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) { + size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : compiler->locals_offset - SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0); } -#endif - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); - - if (word_arg_count == 4) - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), compiler->locals_offset - SSIZE_OF(sw), TMP_REG1, 0); return SLJIT_SUCCESS; } @@ -452,7 +502,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 args_size; + sljit_s32 args_size, locals_offset; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) sljit_s32 word_arg_count = 0; #endif @@ -487,34 +537,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp compiler->args_size = args_size; /* [esp+0] for saving temporaries and function calls. */ - compiler->stack_tmp_size = 2 * SSIZE_OF(sw); + locals_offset = 2 * SSIZE_OF(sw); -#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (scratches > 3) - compiler->stack_tmp_size = 3 * SSIZE_OF(sw); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if ((options & SLJIT_ENTER_CDECL) && scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); +#else + if (scratches >= 3) + locals_offset = 4 * SSIZE_OF(sw); #endif - compiler->saveds_offset = compiler->stack_tmp_size; - if (scratches > 3) - compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw); + compiler->scratches_offset = locals_offset; - compiler->locals_offset = compiler->saveds_offset; + if (scratches > 3) + locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw); if (saveds > 3) - compiler->locals_offset += (saveds - 3) * SSIZE_OF(sw); + locals_offset += (saveds - 3) * SSIZE_OF(sw); - if (options & SLJIT_F64_ALIGNMENT) - compiler->locals_offset = (compiler->locals_offset + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1); + compiler->locals_offset = locals_offset; -#if defined(__APPLE__) saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw); - compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; -#else - if (options & SLJIT_F64_ALIGNMENT) - compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1)); - else - compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(sw) - 1) & ~(SSIZE_OF(sw) - 1)); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(options & SLJIT_ENTER_CDECL)) + saveds += args_size; #endif + + compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds; return SLJIT_SUCCESS; } @@ -552,22 +602,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler SLJIT_ASSERT(compiler->args_size >= 0); SLJIT_ASSERT(compiler->local_size > 0); -#if !defined(__APPLE__) - if (compiler->options & SLJIT_F64_ALIGNMENT) - EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size) - else - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); -#else - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); -#endif + BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); FAIL_IF(emit_stack_frame_release(compiler)); size = 1; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args_size > 0) + if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) size = 3; #endif inst = (sljit_u8*)ensure_buf(compiler, 1 + size); @@ -576,7 +617,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler INC_SIZE(size); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args_size > 0) { + if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) { RET_I16(U8(compiler->args_size)); return SLJIT_SUCCESS; } @@ -637,10 +678,9 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, } else if (stack_size > 0) { if (word_arg_count >= 4) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw)); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); stack_size = 0; arg_types >>= SLJIT_ARG_SHIFT; @@ -725,27 +765,23 @@ static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, slji if (word_arg_count_ptr) *word_arg_count_ptr = word_arg_count; - if (stack_size <= compiler->stack_tmp_size) + if (stack_size <= compiler->scratches_offset) return 0; -#if defined(__APPLE__) - return ((stack_size - compiler->stack_tmp_size + 15) & ~15); -#else - return stack_size - compiler->stack_tmp_size; -#endif + return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf); } static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count) { sljit_s32 float_arg_count = 0; + sljit_u8 *inst; if (word_arg_count >= 4) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw)); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); if (stack_size > 0) - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); stack_size = 0; word_arg_count = 0; @@ -783,8 +819,7 @@ static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 single; if (stack_size > 0) - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size)); + BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0); if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) return SLJIT_SUCCESS; @@ -808,7 +843,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, sljit_sw args_size, prev_args_size, saved_regs_size; sljit_sw types, word_arg_count, float_arg_count; sljit_sw stack_size, prev_stack_size, min_size, offset; - sljit_sw base_reg, word_arg4_offset; + sljit_sw word_arg4_offset; sljit_u8 r2_offset = 0; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL; @@ -852,7 +887,11 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, arg_types >>= SLJIT_ARG_SHIFT; } - if (args_size <= compiler->args_size) { + if (args_size <= compiler->args_size +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + && (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call) +#endif /* SLJIT_X86_32_FASTCALL */ + && 1) { #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) *extra_space = fast_call ? 0 : args_size; prev_args_size = compiler->args_size; @@ -862,18 +901,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, stack_size = args_size + SSIZE_OF(sw) + saved_regs_size; #endif /* SLJIT_X86_32_FASTCALL */ -#if !defined(__APPLE__) - if (compiler->options & SLJIT_F64_ALIGNMENT) { - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size); - offset = stack_size; - base_reg = SLJIT_MEM1(TMP_REG1); - } else { -#endif /* !__APPLE__ */ - offset = stack_size + compiler->local_size; - base_reg = SLJIT_MEM1(SLJIT_SP); -#if !defined(__APPLE__) - } -#endif /* !__APPLE__ */ + offset = stack_size + compiler->local_size; if (!(src & SLJIT_IMM) && src != SLJIT_R0) { if (word_arg_count >= 1) { @@ -884,31 +912,33 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, } #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!fast_call) - offset -= SSIZE_OF(sw); + if (!(compiler->options & SLJIT_ENTER_CDECL)) { + if (!fast_call) + offset -= SSIZE_OF(sw); - if (word_arg_count >= 3) { - word_arg4_offset = SSIZE_OF(sw); + if (word_arg_count >= 3) { + word_arg4_offset = SSIZE_OF(sw); - if (word_arg_count + float_arg_count >= 4) { - word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw); - if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) - word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64); + if (word_arg_count + float_arg_count >= 4) { + word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw); + if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) + word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64); + } + + /* In cdecl mode, at least one more word value must + * be present on the stack before the return address. */ + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0); } - /* In cdecl mode, at least one more word value must - * be present on the stack before the return address. */ - EMIT_MOV(compiler, base_reg, offset - word_arg4_offset, SLJIT_R2, 0); - } - - if (fast_call) { - if (args_size < prev_args_size) { - EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, offset - prev_args_size - SSIZE_OF(sw)); - EMIT_MOV(compiler, base_reg, offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0); + if (fast_call) { + if (args_size < prev_args_size) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw)); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0); + } + } else if (prev_args_size > 0) { + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); } - } else if (prev_args_size > 0) { - EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, offset - prev_args_size); - EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0); } #endif /* SLJIT_X86_32_FASTCALL */ @@ -916,12 +946,12 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: offset -= SSIZE_OF(f64); - FAIL_IF(emit_sse2_store(compiler, 0, base_reg, offset, float_arg_count)); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); float_arg_count--; break; case SLJIT_ARG_TYPE_F32: offset -= SSIZE_OF(f32); - FAIL_IF(emit_sse2_store(compiler, 0, base_reg, offset, float_arg_count)); + FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count)); float_arg_count--; break; default: @@ -936,9 +966,9 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, offset -= SSIZE_OF(sw); if (r2_offset != 0) { EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); - EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); } else - EMIT_MOV(compiler, base_reg, offset, SLJIT_R0, 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); break; case 2: #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) @@ -946,15 +976,15 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, break; #endif offset -= SSIZE_OF(sw); - EMIT_MOV(compiler, base_reg, offset, SLJIT_R1, 0); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); break; case 3: offset -= SSIZE_OF(sw); break; case 4: offset -= SSIZE_OF(sw); - EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw)); - EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); break; } word_arg_count--; @@ -963,22 +993,12 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, types >>= SLJIT_ARG_SHIFT; } -#if !defined(__APPLE__) - if (compiler->options & SLJIT_F64_ALIGNMENT) { - EMIT_MOV(compiler, SLJIT_SP, 0, TMP_REG1, 0); - } else { -#endif /* !__APPLE__ */ - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); -#if !defined(__APPLE__) - } -#endif /* !__APPLE__ */ + BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); FAIL_IF(emit_stack_frame_release(compiler)); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (args_size < prev_args_size) - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, prev_args_size - args_size)); + BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0); #endif return SLJIT_SUCCESS; @@ -994,56 +1014,31 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, if (word_arg_count >= 3) stack_size += SSIZE_OF(sw); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - prev_args_size = compiler->args_size; -#else prev_args_size = 0; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (!(compiler->options & SLJIT_ENTER_CDECL)) + prev_args_size = compiler->args_size; #endif prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size; min_size = prev_stack_size + compiler->local_size; - base_reg = SLJIT_MEM1(SLJIT_SP); - word_arg4_offset = compiler->saveds_offset - SSIZE_OF(sw); - -#if !defined(__APPLE__) - if (compiler->options & SLJIT_F64_ALIGNMENT) { - min_size += 2 * SSIZE_OF(sw); - - if (stack_size < min_size) - stack_size = min_size; - - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, stack_size - prev_stack_size)); - - inst = emit_x86_instruction(compiler, 1, SLJIT_SP, 0, TMP_REG1, 0); - FAIL_IF(!inst); - *inst = XCHG_r_rm; + word_arg4_offset = compiler->scratches_offset; + if (stack_size > min_size) { + BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0); if (src == SLJIT_MEM1(SLJIT_SP)) - src = SLJIT_MEM1(TMP_REG1); - base_reg = SLJIT_MEM1(TMP_REG1); - } else { -#endif /* !__APPLE__ */ - if (stack_size > min_size) { - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size - min_size)); - if (src == SLJIT_MEM1(SLJIT_SP)) - srcw += stack_size - min_size; - word_arg4_offset += stack_size - min_size; - } - else - stack_size = min_size; -#if !defined(__APPLE__) + srcw += stack_size - min_size; + word_arg4_offset += stack_size - min_size; } -#endif /* !__APPLE__ */ + else + stack_size = min_size; if (word_arg_count >= 3) { EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0); if (word_arg_count >= 4) - EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, word_arg4_offset); + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset); } if (!(src & SLJIT_IMM) && src != SLJIT_R0) { @@ -1184,8 +1179,8 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, if (offset == 0) return SLJIT_SUCCESS; - return emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, offset); + BINARY_IMM32(ADD, offset, SLJIT_SP, 0); + return SLJIT_SUCCESS; } static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space) @@ -1193,8 +1188,7 @@ static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 e /* Called when stack consumption cannot be reduced to 0. */ sljit_u8 *inst; - FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, extra_space)); + BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0); inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); @@ -1321,7 +1315,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args)); - compiler->saveds_offset += stack_size; + compiler->scratches_offset += stack_size; compiler->locals_offset += stack_size; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -1330,7 +1324,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi #endif FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - compiler->saveds_offset -= stack_size; + compiler->scratches_offset -= stack_size; compiler->locals_offset -= stack_size; return post_call_with_args(compiler, arg_types, 0); @@ -1340,7 +1334,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); - compiler->saveds_offset += stack_size; + compiler->scratches_offset += stack_size; compiler->locals_offset += stack_size; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ @@ -1349,7 +1343,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi #endif FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - compiler->saveds_offset -= stack_size; + compiler->scratches_offset -= stack_size; compiler->locals_offset -= stack_size; return post_call_with_args(compiler, arg_types, stack_size); @@ -1412,36 +1406,18 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) { - sljit_sw size, saved_size; - sljit_s32 has_f64_aligment; + sljit_sw size; /* Don't adjust shadow stack if it isn't enabled. */ - if (!cpu_has_shadow_stack ()) + if (!cpu_has_shadow_stack()) return SLJIT_SUCCESS; SLJIT_ASSERT(compiler->args_size >= 0); SLJIT_ASSERT(compiler->local_size > 0); -#if !defined(__APPLE__) - has_f64_aligment = compiler->options & SLJIT_F64_ALIGNMENT; -#else - has_f64_aligment = 0; -#endif - size = compiler->local_size; - saved_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); - if (has_f64_aligment) { - /* mov TMP_REG1, [esp + local_size]. */ - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), size); - /* mov TMP_REG1, [TMP_REG1+ saved_size]. */ - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), saved_size); - /* Move return address to [esp]. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0); - size = 0; - } else - size += saved_size; - return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); } diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c index 704716a..f37df6e 100644 --- a/src/sljit/sljitNativeX86_64.c +++ b/src/sljit/sljitNativeX86_64.c @@ -366,6 +366,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi { sljit_uw size; sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = 0; sljit_s32 saved_regs_size, tmp, i; #ifdef _WIN64 sljit_s32 saved_float_regs_size; @@ -455,7 +456,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi break; } #endif /* _WIN64 */ - EMIT_MOV(compiler, SLJIT_S0 - word_arg_count, 0, tmp, 0); + if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { + if (tmp != SLJIT_R0 + word_arg_count) + EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0); + } else { + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0); + saved_arg_count++; + } word_arg_count++; } else { #ifdef _WIN64 @@ -483,33 +490,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); } else { - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12); - SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); - - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096)); - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1)); + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096); + BINARY_IMM32(SUB, 4096, SLJIT_SP, 0); + BINARY_IMM32(SUB, 1, TMP_REG1, 0); inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); INC_SIZE(2); inst[0] = JNE_i8; - inst[1] = (sljit_u8)-19; + inst[1] = (sljit_u8)-21; + local_size &= 0xfff; } - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); + if (local_size > 0) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); } #endif /* _WIN64 */ - if (local_size > 0) { - FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); - } + if (local_size > 0) + BINARY_IMM32(SUB, local_size, SLJIT_SP, 0); #ifdef _WIN64 if (saved_float_regs_size > 0) { diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c index 31244fa..c7dd9be 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/src/sljit/sljitNativeX86_common.c @@ -79,7 +79,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { #define CHECK_EXTRA_REGS(p, w, do) \ if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ if (p <= compiler->scratches) \ - w = compiler->saveds_offset - ((p) - SLJIT_R2) * SSIZE_OF(sw); \ + w = compiler->scratches_offset + ((p) - SLJIT_R3) * SSIZE_OF(sw); \ else \ w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \ p = SLJIT_MEM1(SLJIT_SP); \ @@ -208,6 +208,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = { #define JMP_i32 0xe9 #define JMP_rm (/* GROUP_FF */ 4 << 3) #define LEA_r_m 0x8d +#define LOOP_i8 0xe2 #define MOV_r_rm 0x8b #define MOV_r_i32 0xb8 #define MOV_rm_r 0x89 @@ -386,10 +387,12 @@ static sljit_u8 get_jump_code(sljit_uw type) return 0x85 /* jne */; case SLJIT_LESS: + case SLJIT_CARRY: case SLJIT_LESS_F64: return 0x82 /* jc */; case SLJIT_GREATER_EQUAL: + case SLJIT_NOT_CARRY: case SLJIT_GREATER_EQUAL_F64: return 0x83 /* jae */; @@ -685,17 +688,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) -static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, - sljit_u32 op_types, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w); +#define BINARY_IMM32(op_imm, immw, arg, argw) \ + do { \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ + } while (0) -static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, - sljit_u32 op_types, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + do { \ + if (IS_HALFWORD(immw) || compiler->mode32) { \ + BINARY_IMM32(op_imm, immw, arg, argw); \ + } \ + else { \ + FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ + } \ + } while (0) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) + +#else /* !SLJIT_CONFIG_X86_64 */ + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + BINARY_IMM32(op_imm, immw, arg, argw) + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) + +#endif /* SLJIT_CONFIG_X86_64 */ static sljit_s32 emit_mov(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, @@ -1551,9 +1577,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_not_with_flags(compiler, dst, dstw, src, srcw); return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); - case SLJIT_NEG: - return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); - case SLJIT_CLZ: return emit_clz(compiler, op_flags, dst, dstw, src, srcw); } @@ -1561,36 +1584,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return SLJIT_SUCCESS; } -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - -#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ - if (IS_HALFWORD(immw) || compiler->mode32) { \ - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!inst); \ - *(inst + 1) |= (op_imm); \ - } \ - else { \ - FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ - inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ - FAIL_IF(!inst); \ - *inst = (op_mr); \ - } - -#define BINARY_EAX_IMM(op_eax_imm, immw) \ - FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) - -#else - -#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ - FAIL_IF(!inst); \ - *(inst + 1) |= (op_imm); - -#define BINARY_EAX_IMM(op_eax_imm, immw) \ - FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) - -#endif - static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, sljit_u32 op_types, sljit_s32 dst, sljit_sw dstw, @@ -2267,6 +2260,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_cum_binary(compiler, BINARY_OPCODE(ADC), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: + if (src1 == SLJIT_IMM && src1w == 0) + return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w); + if (!HAS_FLAGS(op)) { if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error;