JIT compiler update

This commit is contained in:
Zoltan Herczeg 2022-04-11 05:19:52 +00:00
parent 64c9baaaa4
commit e612e06b5d
17 changed files with 811 additions and 613 deletions

View File

@ -6597,7 +6597,7 @@ jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump); JUMPHERE(jump);
OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
if (HAS_VIRTUAL_REGISTERS) if (HAS_VIRTUAL_REGISTERS)
{ {

View File

@ -156,7 +156,7 @@ extern "C" {
#define SLJIT_CONFIG_MIPS_32 1 #define SLJIT_CONFIG_MIPS_32 1
#elif defined(__mips64) #elif defined(__mips64)
#define SLJIT_CONFIG_MIPS_64 1 #define SLJIT_CONFIG_MIPS_64 1
#elif defined(__sparc__) || defined(__sparc) #elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64)
#define SLJIT_CONFIG_SPARC_32 1 #define SLJIT_CONFIG_SPARC_32 1
#elif defined(__s390x__) #elif defined(__s390x__)
#define SLJIT_CONFIG_S390X 1 #define SLJIT_CONFIG_S390X 1
@ -274,9 +274,13 @@ extern "C" {
#ifndef SLJIT_INLINE #ifndef SLJIT_INLINE
/* Inline functions. Some old compilers do not support them. */ /* Inline functions. Some old compilers do not support them. */
#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510 #ifdef __SUNPRO_C
#if __SUNPRO_C < 0x560
#define SLJIT_INLINE #define SLJIT_INLINE
#else #else
#define SLJIT_INLINE inline
#endif /* __SUNPRO_C */
#else
#define SLJIT_INLINE __inline #define SLJIT_INLINE __inline
#endif #endif
#endif /* !SLJIT_INLINE */ #endif /* !SLJIT_INLINE */
@ -319,18 +323,36 @@ extern "C" {
/* Instruction cache flush. */ /* Instruction cache flush. */
/****************************/ /****************************/
/*
* TODO:
*
* clang >= 15 could be safe to enable below
* older versions are known to abort in some targets
* https://github.com/PhilipHazel/pcre2/issues/92
*
* beware APPLE is known to have removed the code in iOS so
* it will need to be excempted or result in broken builds
*/
#if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin)
#if __has_builtin(__builtin___clear_cache) #if __has_builtin(__builtin___clear_cache) && !defined(__clang__)
/*
* https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248
* https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811
* gcc's clear_cache builtin for power and sparc are broken
*/
#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32)
#define SLJIT_CACHE_FLUSH(from, to) \ #define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)(from), (char*)(to)) __builtin___clear_cache((char*)(from), (char*)(to))
#endif
#endif /* __has_builtin(__builtin___clear_cache) */ #endif /* gcc >= 10 */
#endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */ #endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */
#ifndef SLJIT_CACHE_FLUSH #ifndef SLJIT_CACHE_FLUSH
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
/* Not required to implement on archs with unified caches. */ /* Not required to implement on archs with unified caches. */
#define SLJIT_CACHE_FLUSH(from, to) #define SLJIT_CACHE_FLUSH(from, to)
@ -340,9 +362,9 @@ extern "C" {
/* Supported by all macs since Mac OS 10.5. /* Supported by all macs since Mac OS 10.5.
However, it does not work on non-jailbroken iOS devices, However, it does not work on non-jailbroken iOS devices,
although the compilation is successful. */ although the compilation is successful. */
#include <libkern/OSCacheControl.h>
#define SLJIT_CACHE_FLUSH(from, to) \ #define SLJIT_CACHE_FLUSH(from, to) \
sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from)))
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
@ -351,18 +373,6 @@ extern "C" {
ppc_cache_flush((from), (to)) ppc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 #define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
#define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)(from), (char*)(to))
#elif defined __ANDROID__
/* Android lacks __clear_cache; instead, cacheflush should be used. */
#define SLJIT_CACHE_FLUSH(from, to) \
cacheflush((long)(from), (long)(to), 0)
#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) #elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ /* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
@ -370,14 +380,26 @@ extern "C" {
sparc_cache_flush((from), (to)) sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 #define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__)
#define SLJIT_CACHE_FLUSH(from, to) \
__builtin___clear_cache((char*)(from), (char*)(to))
#elif defined __ANDROID__
/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */
#include <sys/cachectl.h>
#define SLJIT_CACHE_FLUSH(from, to) \
cacheflush((long)(from), (long)(to), 0)
#elif defined _WIN32 #elif defined _WIN32
#define SLJIT_CACHE_FLUSH(from, to) \ #define SLJIT_CACHE_FLUSH(from, to) \
FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from)) FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from))
#else #else
/* Calls __ARM_NR_cacheflush on ARM-Linux. */ /* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */
#define SLJIT_CACHE_FLUSH(from, to) \ #define SLJIT_CACHE_FLUSH(from, to) \
__clear_cache((char*)(from), (char*)(to)) __clear_cache((char*)(from), (char*)(to))
@ -781,8 +803,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
/* CPU status flags management. */ /* CPU status flags management. */
/********************************/ /********************************/
#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ #if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \
|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
|| (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
|| (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \ || (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \
|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)

View File

@ -130,7 +130,8 @@
#define FAST_IS_REG(reg) ((reg) <= REG_MASK) #define FAST_IS_REG(reg) ((reg) <= REG_MASK)
/* Mask for argument types. */ /* Mask for argument types. */
#define SLJIT_ARG_MASK ((1 << SLJIT_ARG_SHIFT) - 1) #define SLJIT_ARG_MASK 0x7
#define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG)
/* Jump flags. */ /* Jump flags. */
#define JUMP_LABEL 0x1 #define JUMP_LABEL 0x1
@ -541,7 +542,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_la
} }
#define SLJIT_CURRENT_FLAGS_ALL \ #define SLJIT_CURRENT_FLAGS_ALL \
(SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE) (SLJIT_CURRENT_FLAGS_32 | SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE)
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags)
{ {
@ -747,11 +748,11 @@ static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 fscratches, sljit_s32 word_arg_limit) static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches)
{ {
sljit_s32 word_arg_count, float_arg_count, curr_type; sljit_s32 word_arg_count, scratch_arg_end, saved_arg_count, float_arg_count, curr_type;
curr_type = (arg_types & SLJIT_ARG_MASK); curr_type = (arg_types & SLJIT_ARG_FULL_MASK);
if (curr_type >= SLJIT_ARG_TYPE_F64) { if (curr_type >= SLJIT_ARG_TYPE_F64) {
if (curr_type > SLJIT_ARG_TYPE_F32 || fscratches == 0) if (curr_type > SLJIT_ARG_TYPE_F32 || fscratches == 0)
@ -764,21 +765,39 @@ static sljit_s32 function_check_arguments(sljit_s32 arg_types, sljit_s32 scratch
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
word_arg_count = 0; word_arg_count = 0;
scratch_arg_end = 0;
saved_arg_count = 0;
float_arg_count = 0; float_arg_count = 0;
while (arg_types != 0 && word_arg_count + float_arg_count < 4) { while (arg_types != 0) {
if (word_arg_count + float_arg_count >= 4)
return 0;
curr_type = (arg_types & SLJIT_ARG_MASK); curr_type = (arg_types & SLJIT_ARG_MASK);
if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
if (saveds == -1 || curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_P)
return 0;
word_arg_count++;
scratch_arg_end = word_arg_count;
} else {
if (curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_F32) if (curr_type < SLJIT_ARG_TYPE_W || curr_type > SLJIT_ARG_TYPE_F32)
return 0; return 0;
if (curr_type < SLJIT_ARG_TYPE_F64) if (curr_type < SLJIT_ARG_TYPE_F64) {
word_arg_count++; word_arg_count++;
else saved_arg_count++;
} else
float_arg_count++; float_arg_count++;
}
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
} }
return (arg_types == 0 && word_arg_count <= word_arg_limit && float_arg_count <= fscratches); if (saveds == -1)
return (word_arg_count <= scratches && float_arg_count <= fscratches);
return (saved_arg_count <= saveds && scratch_arg_end <= scratches && float_arg_count <= fscratches);
} }
#define FUNCTION_CHECK_IS_REG(r) \ #define FUNCTION_CHECK_IS_REG(r) \
@ -976,7 +995,7 @@ static const char* op0_names[] = {
static const char* op1_names[] = { static const char* op1_names[] = {
"", ".u8", ".s8", ".u16", "", ".u8", ".s8", ".u16",
".s16", ".u32", ".s32", "32", ".s16", ".u32", ".s32", "32",
".p", "not", "neg", "clz", ".p", "not", "clz",
}; };
static const char* op2_names[] = { static const char* op2_names[] = {
@ -1061,7 +1080,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(compiler);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL));
CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS);
CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS);
@ -1069,21 +1088,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS);
CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64); CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64);
CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, fscratches, saveds)); CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches));
compiler->last_flags = 0; compiler->last_flags = 0;
#endif #endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " enter ret[%s", fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]);
call_arg_names[arg_types & SLJIT_ARG_MASK]);
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
if (arg_types) { if (arg_types) {
fprintf(compiler->verbose, "], args["); fprintf(compiler->verbose, "], args[");
do { do {
fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK],
(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : "");
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
if (arg_types) if (arg_types)
fprintf(compiler->verbose, ","); fprintf(compiler->verbose, ",");
@ -1091,7 +1110,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil
} }
fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n",
(options & SLJIT_F64_ALIGNMENT) ? " align:f64," : "", (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "",
scratches, saveds, fscratches, fsaveds, local_size); scratches, saveds, fscratches, fsaveds, local_size);
} }
#endif #endif
@ -1105,7 +1124,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi
SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(compiler);
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL));
CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS);
CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS);
CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS);
@ -1113,21 +1132,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi
CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS);
CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS);
CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE);
CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64); CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64);
CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, fscratches, saveds)); CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches));
compiler->last_flags = 0; compiler->last_flags = 0;
#endif #endif
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
if (SLJIT_UNLIKELY(!!compiler->verbose)) { if (SLJIT_UNLIKELY(!!compiler->verbose)) {
fprintf(compiler->verbose, " set_context ret[%s", fprintf(compiler->verbose, " set_context ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]);
call_arg_names[arg_types & SLJIT_ARG_MASK]);
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
if (arg_types) { if (arg_types) {
fprintf(compiler->verbose, "], args["); fprintf(compiler->verbose, "], args[");
do { do {
fprintf(compiler->verbose, "%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); fprintf(compiler->verbose, "%s%s", call_arg_names[arg_types & SLJIT_ARG_MASK],
(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? "_r" : "");
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
if (arg_types) if (arg_types)
fprintf(compiler->verbose, ","); fprintf(compiler->verbose, ",");
@ -1135,7 +1154,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi
} }
fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n",
(options & SLJIT_F64_ALIGNMENT) ? " align:f64," : "", (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "",
scratches, saveds, fscratches, fsaveds, local_size); scratches, saveds, fscratches, fsaveds, local_size);
} }
#endif #endif
@ -1251,10 +1270,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler
/* Only SLJIT_32 and SLJIT_SET_Z are allowed. */ /* Only SLJIT_32 and SLJIT_SET_Z are allowed. */
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
break; break;
case SLJIT_NEG:
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)
|| GET_FLAG_TYPE(op) == SLJIT_OVERFLOW);
break;
case SLJIT_MOV: case SLJIT_MOV:
case SLJIT_MOV_U32: case SLJIT_MOV_U32:
case SLJIT_MOV_P: case SLJIT_MOV_P:
@ -1644,14 +1659,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32)));
CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL);
CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_32)); CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_32));
if ((type & 0xff) < SLJIT_JUMP) { if ((type & 0xff) < SLJIT_JUMP) {
if ((type & 0xff) <= SLJIT_NOT_ZERO) if ((type & 0xff) <= SLJIT_NOT_ZERO)
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
else else if ((compiler->last_flags & 0xff) == SLJIT_CARRY) {
CHECK_ARGUMENT((type & 0xff) == SLJIT_CARRY || (type & 0xff) == SLJIT_NOT_CARRY);
compiler->last_flags = 0;
} else
CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
|| ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW));
} }
@ -1670,7 +1687,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compile
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN))); CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN)));
CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL);
CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, compiler->fscratches, compiler->scratches)); CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches));
if (type & SLJIT_CALL_RETURN) { if (type & SLJIT_CALL_RETURN) {
CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return);
@ -1777,6 +1794,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compil
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_CALL_RETURN))); CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_CALL_RETURN)));
CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL);
CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches));
FUNCTION_CHECK_SRC(src, srcw); FUNCTION_CHECK_SRC(src, srcw);
if (type & SLJIT_CALL_RETURN) { if (type & SLJIT_CALL_RETURN) {
@ -1814,7 +1832,6 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32))); CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32)));
CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1));
CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32
|| (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR));
CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK));
@ -1823,6 +1840,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com
CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z);
else else
CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff)
|| ((type & 0xff) == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY)
|| ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW));
FUNCTION_CHECK_DST(dst, dstw); FUNCTION_CHECK_DST(dst, dstw);

View File

@ -256,16 +256,24 @@ extern "C" {
/* The following argument type definitions are used by sljit_emit_enter, /* The following argument type definitions are used by sljit_emit_enter,
sljit_set_context, sljit_emit_call, and sljit_emit_icall functions. sljit_set_context, sljit_emit_call, and sljit_emit_icall functions.
As for sljit_emit_enter, the first integer argument is available in
SLJIT_R0, the second one in SLJIT_R1, and so on. Similarly the first
floating point argument is available in SLJIT_FR0, the second one in
SLJIT_FR1, and so on.
As for sljit_emit_call and sljit_emit_icall, the first integer argument As for sljit_emit_call and sljit_emit_icall, the first integer argument
must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on. must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on.
Similarly the first floating point argument must be placed into SLJIT_FR0, Similarly the first floating point argument must be placed into SLJIT_FR0,
the second one into SLJIT_FR1, and so on. the second one into SLJIT_FR1, and so on.
As for sljit_emit_enter, the integer arguments can be stored in scratch
or saved registers. The first integer argument without _R postfix is
stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer
arguments with _R postfix are placed into scratch registers. The index
of the scratch register is the count of the previous integer arguments
starting from SLJIT_R0. The floating point arguments are always placed
into SLJIT_FR0, SLJIT_FR1, and so on.
Note: if a function is called by sljit_emit_call/sljit_emit_icall and
an argument is stored in a scratch register by sljit_emit_enter,
that argument uses the same scratch register index for both
integer and floating point arguments.
Example function definition: Example function definition:
sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a, sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a,
sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d); sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d);
@ -276,23 +284,47 @@ extern "C" {
| SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4)
Short form of argument type definition: Short form of argument type definition:
SLJIT_ARGS4(F32, P, F64, 32, F32) SLJIT_ARGS4(32, P, F64, 32, F32)
Argument passing: Argument passing:
arg_a must be placed in SLJIT_R0 arg_a must be placed in SLJIT_R0
arg_c must be placed in SLJIT_R1 arg_c must be placed in SLJIT_R1
arg_b must be placed in SLJIT_FR0 arg_b must be placed in SLJIT_FR0
arg_d must be placed in SLJIT_FR1 arg_d must be placed in SLJIT_FR1
Examples for argument processing by sljit_emit_enter:
SLJIT_ARGS4(VOID, P, 32_R, F32, W)
Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1
SLJIT_ARGS4(VOID, W, W_R, W, W_R)
Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3
SLJIT_ARGS4(VOID, F64, W, F32, W_R)
Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1
Note: it is recommended to pass the scratch arguments first
followed by the saved arguments:
SLJIT_ARGS4(VOID, W_R, W_R, W, W)
Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1
*/ */
/* The following flag is only allowed for the integer arguments of
sljit_emit_enter. When the flag is set, the integer argument is
stored in a scratch register instead of a saved register. */
#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8
/* Void result, can only be used by SLJIT_ARG_RETURN. */ /* Void result, can only be used by SLJIT_ARG_RETURN. */
#define SLJIT_ARG_TYPE_VOID 0 #define SLJIT_ARG_TYPE_VOID 0
/* Machine word sized integer argument or result. */ /* Machine word sized integer argument or result. */
#define SLJIT_ARG_TYPE_W 1 #define SLJIT_ARG_TYPE_W 1
#define SLJIT_ARG_TYPE_W_R (SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG)
/* 32 bit integer argument or result. */ /* 32 bit integer argument or result. */
#define SLJIT_ARG_TYPE_32 2 #define SLJIT_ARG_TYPE_32 2
#define SLJIT_ARG_TYPE_32_R (SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG)
/* Pointer sized integer argument or result. */ /* Pointer sized integer argument or result. */
#define SLJIT_ARG_TYPE_P 3 #define SLJIT_ARG_TYPE_P 3
#define SLJIT_ARG_TYPE_P_R (SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG)
/* 64 bit floating point argument or result. */ /* 64 bit floating point argument or result. */
#define SLJIT_ARG_TYPE_F64 4 #define SLJIT_ARG_TYPE_F64 4
/* 32 bit floating point argument or result. */ /* 32 bit floating point argument or result. */
@ -415,8 +447,7 @@ struct sljit_compiler {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_s32 args_size; sljit_s32 args_size;
sljit_s32 locals_offset; sljit_s32 locals_offset;
sljit_s32 saveds_offset; sljit_s32 scratches_offset;
sljit_s32 stack_tmp_size;
#endif #endif
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@ -652,9 +683,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
overwrites the previous context. overwrites the previous context.
*/ */
/* The absolute address returned by sljit_get_local_base with /* The compiled function uses cdecl calling
offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */ * convention instead of SLJIT_FUNC. */
#define SLJIT_F64_ALIGNMENT 0x00000001 #define SLJIT_ENTER_CDECL 0x00000001
/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
#define SLJIT_MAX_LOCAL_SIZE 65536 #define SLJIT_MAX_LOCAL_SIZE 65536
@ -967,14 +998,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
Note: immediate source argument is not supported */ Note: immediate source argument is not supported */
#define SLJIT_NOT (SLJIT_OP1_BASE + 9) #define SLJIT_NOT (SLJIT_OP1_BASE + 9)
#define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32) #define SLJIT_NOT32 (SLJIT_NOT | SLJIT_32)
/* Flags: Z | OVERFLOW
Note: immediate source argument is not supported */
#define SLJIT_NEG (SLJIT_OP1_BASE + 10)
#define SLJIT_NEG32 (SLJIT_NEG | SLJIT_32)
/* Count leading zeroes /* Count leading zeroes
Flags: - (may destroy flags) Flags: - (may destroy flags)
Note: immediate source argument is not supported */ Note: immediate source argument is not supported */
#define SLJIT_CLZ (SLJIT_OP1_BASE + 11) #define SLJIT_CLZ (SLJIT_OP1_BASE + 10)
#define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32) #define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_32)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
@ -1175,8 +1202,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) #define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW)
#define SLJIT_NOT_OVERFLOW 11 #define SLJIT_NOT_OVERFLOW 11
/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */ /* Unlike other flags, sljit_emit_jump may destroy this flag. */
#define SLJIT_SET_CARRY SLJIT_SET(12) #define SLJIT_CARRY 12
#define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY)
#define SLJIT_NOT_CARRY 13
/* Floating point comparison types. */ /* Floating point comparison types. */
#define SLJIT_EQUAL_F64 14 #define SLJIT_EQUAL_F64 14
@ -1538,12 +1567,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
/* Flags were set by a 32 bit operation. */ /* Flags were set by a 32 bit operation. */
#define SLJIT_CURRENT_FLAGS_32 SLJIT_32 #define SLJIT_CURRENT_FLAGS_32 SLJIT_32
/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */ /* Flags were set by an ADD or ADDC operations. */
#define SLJIT_CURRENT_FLAGS_ADD_SUB 0x01 #define SLJIT_CURRENT_FLAGS_ADD 0x01
/* Flags were set by a SUB, SUBC, or NEG operation. */
#define SLJIT_CURRENT_FLAGS_SUB 0x02
/* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode. /* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode.
Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */ Must be combined with SLJIT_CURRENT_FLAGS_SUB. */
#define SLJIT_CURRENT_FLAGS_COMPARE 0x02 #define SLJIT_CURRENT_FLAGS_COMPARE 0x04
/* Define the currently available CPU status flags. It is usually used after /* Define the currently available CPU status flags. It is usually used after
an sljit_emit_label or sljit_emit_op_custom operations to define which CPU an sljit_emit_label or sljit_emit_op_custom operations to define which CPU

View File

@ -982,6 +982,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#define ALLOW_IMM 0x10 #define ALLOW_IMM 0x10
#define ALLOW_INV_IMM 0x20 #define ALLOW_INV_IMM 0x20
#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
#define ALLOW_NEG_IMM 0x40
/* s/l - store/load (1 bit) /* s/l - store/load (1 bit)
u/s - signed/unsigned (1 bit) u/s - signed/unsigned (1 bit)
@ -1048,7 +1049,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{ {
sljit_uw imm, offset; sljit_uw imm, offset;
sljit_s32 i, tmp, size, word_arg_count; sljit_s32 i, tmp, size, word_arg_count, saved_arg_count;
#ifdef __SOFTFP__ #ifdef __SOFTFP__
sljit_u32 float_arg_count; sljit_u32 float_arg_count;
#else #else
@ -1104,6 +1105,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
word_arg_count = 0; word_arg_count = 0;
saved_arg_count = 0;
#ifdef __SOFTFP__ #ifdef __SOFTFP__
SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
@ -1122,7 +1124,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP) FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
float_arg_count++; float_arg_count++;
offset += sizeof(sljit_f64); offset += sizeof(sljit_f64) - sizeof(sljit_sw);
break; break;
case SLJIT_ARG_TYPE_F32: case SLJIT_ARG_TYPE_F32:
if (offset < 4 * sizeof(sljit_sw)) if (offset < 4 * sizeof(sljit_sw))
@ -1131,18 +1133,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP) FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
float_arg_count++; float_arg_count++;
offset += sizeof(sljit_f32);
break; break;
default: default:
word_arg_count++;
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
tmp = SLJIT_S0 - saved_arg_count;
saved_arg_count++;
} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
tmp = word_arg_count;
else
break;
if (offset < 4 * sizeof(sljit_sw)) if (offset < 4 * sizeof(sljit_sw))
FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - word_arg_count) | (offset >> 2))); FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
else else
FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000
| RN(SLJIT_SP) | RD(SLJIT_S0 - word_arg_count) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw))));
word_arg_count++;
offset += sizeof(sljit_sw);
break; break;
} }
offset += sizeof(sljit_sw);
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
} }
@ -1173,7 +1184,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
offset++; offset++;
break; break;
default: default:
FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - word_arg_count) | RM(SLJIT_R0 + word_arg_count))); if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count)));
saved_arg_count++;
}
word_arg_count++; word_arg_count++;
break; break;
} }
@ -1405,7 +1420,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_ADD: case SLJIT_ADD:
SLJIT_ASSERT(!(flags & INV_IMM)); SLJIT_ASSERT(!(flags & INV_IMM));
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
@ -1417,7 +1431,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
case SLJIT_SUB: case SLJIT_SUB:
SLJIT_ASSERT(!(flags & INV_IMM)); SLJIT_ASSERT(!(flags & INV_IMM));
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN) if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2))); return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
@ -1765,15 +1778,35 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
/* We prefers register and simple consts. */ /* We prefers register and simple consts. */
sljit_s32 dst_reg; sljit_s32 dst_reg;
sljit_s32 src1_reg; sljit_s32 src1_reg;
sljit_s32 src2_reg; sljit_s32 src2_reg = 0;
sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
sljit_s32 neg_op = 0;
if (dst == TMP_REG2) if (dst == TMP_REG2)
flags |= UNUSED_RETURN; flags |= UNUSED_RETURN;
SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
src2_reg = 0; if (inp_flags & ALLOW_NEG_IMM) {
switch (GET_OPCODE(op)) {
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
neg_op = SLJIT_SUB;
break;
case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
neg_op = SLJIT_SUBC;
break;
case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
neg_op = SLJIT_ADD;
break;
case SLJIT_SUBC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
neg_op = SLJIT_ADDC;
break;
}
}
do { do {
if (!(inp_flags & ALLOW_IMM)) if (!(inp_flags & ALLOW_IMM))
@ -1790,17 +1823,10 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
break; break;
} }
} }
if (GET_OPCODE(op) == SLJIT_ADD) { if (neg_op != 0) {
src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w); src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w);
if (src2_reg) { if (src2_reg) {
op = SLJIT_SUB | GET_ALL_FLAGS(op); op = neg_op | GET_ALL_FLAGS(op);
break;
}
}
if (GET_OPCODE(op) == SLJIT_SUB) {
src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w);
if (src2_reg) {
op = SLJIT_ADD | GET_ALL_FLAGS(op);
break; break;
} }
} }
@ -1823,13 +1849,13 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
break; break;
} }
} }
if (GET_OPCODE(op) == SLJIT_ADD) { if (neg_op >= SLJIT_SUB) {
/* Note: additive operation (commutative). */
src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w); src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
if (src2_reg) { if (src2_reg) {
/* Note: add is commutative operation. */
src1 = src2; src1 = src2;
src1w = src2w; src1w = src2w;
op = SLJIT_SUB | GET_ALL_FLAGS(op); op = neg_op | GET_ALL_FLAGS(op);
break; break;
} }
} }
@ -2007,13 +2033,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_NOT: case SLJIT_NOT:
return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_NEG:
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
case SLJIT_CLZ: case SLJIT_CLZ:
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
} }
@ -2037,6 +2056,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
case SLJIT_ADDC: case SLJIT_ADDC:
case SLJIT_SUB: case SLJIT_SUB:
case SLJIT_SUBC: case SLJIT_SUBC:
return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_OR: case SLJIT_OR:
case SLJIT_XOR: case SLJIT_XOR:
return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
@ -2126,6 +2147,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_u32 size) void *instruction, sljit_u32 size)
{ {
SLJIT_UNUSED_ARG(size);
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@ -2385,10 +2407,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
case SLJIT_NOT_EQUAL_F64: case SLJIT_NOT_EQUAL_F64:
return 0x10000000; return 0x10000000;
case SLJIT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
return 0x20000000;
/* fallthrough */
case SLJIT_LESS: case SLJIT_LESS:
case SLJIT_LESS_F64: case SLJIT_LESS_F64:
return 0x30000000; return 0x30000000;
case SLJIT_NOT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
return 0x30000000;
/* fallthrough */
case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL:
case SLJIT_GREATER_EQUAL_F64: case SLJIT_GREATER_EQUAL_F64:
return 0x20000000; return 0x20000000;
@ -2414,7 +2446,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0xd0000000; return 0xd0000000;
case SLJIT_OVERFLOW: case SLJIT_OVERFLOW:
if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
return 0x10000000; return 0x10000000;
/* fallthrough */ /* fallthrough */
@ -2422,7 +2454,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0x60000000; return 0x60000000;
case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_OVERFLOW:
if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
return 0x00000000; return 0x00000000;
/* fallthrough */ /* fallthrough */

View File

@ -630,7 +630,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
switch (op) { switch (op) {
case SLJIT_MUL: case SLJIT_MUL:
case SLJIT_NEG:
case SLJIT_CLZ: case SLJIT_CLZ:
case SLJIT_ADDC: case SLJIT_ADDC:
case SLJIT_SUBC: case SLJIT_SUBC:
@ -645,12 +644,15 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
goto set_flags; goto set_flags;
case SLJIT_SUB: case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (flags & ARG1_IMM) if (flags & ARG1_IMM)
break; break;
imm = -imm; imm = -imm;
/* Fall through. */ /* Fall through. */
case SLJIT_ADD: case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; if (op != SLJIT_SUB)
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
if (imm == 0) { if (imm == 0) {
CHECK_FLAGS(1 << 29); CHECK_FLAGS(1 << 29);
return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
@ -791,27 +793,23 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
SLJIT_ASSERT(arg1 == TMP_REG1); SLJIT_ASSERT(arg1 == TMP_REG1);
FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
break; /* Set flags. */ break; /* Set flags. */
case SLJIT_NEG:
SLJIT_ASSERT(arg1 == TMP_REG1);
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
if (flags & SET_FLAGS)
inv_bits |= 1 << 29;
return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
case SLJIT_CLZ: case SLJIT_CLZ:
SLJIT_ASSERT(arg1 == TMP_REG1); SLJIT_ASSERT(arg1 == TMP_REG1);
return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
case SLJIT_ADD: case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
CHECK_FLAGS(1 << 29); CHECK_FLAGS(1 << 29);
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
case SLJIT_ADDC: case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
CHECK_FLAGS(1 << 29); CHECK_FLAGS(1 << 29);
return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
case SLJIT_SUB: case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
CHECK_FLAGS(1 << 29); CHECK_FLAGS(1 << 29);
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
case SLJIT_SUBC: case SLJIT_SUBC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
CHECK_FLAGS(1 << 29); CHECK_FLAGS(1 << 29);
return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
case SLJIT_MUL: case SLJIT_MUL:
@ -1012,9 +1010,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
#endif /* _WIN32 */ #endif /* _WIN32 */
tmp = 0;
while (arg_types > 0) { while (arg_types > 0) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - word_arg_count) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count))); if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count)));
tmp++;
}
word_arg_count++; word_arg_count++;
} }
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
@ -1452,6 +1454,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
void *instruction, sljit_u32 size) void *instruction, sljit_u32 size)
{ {
SLJIT_UNUSED_ARG(size);
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@ -1703,10 +1706,20 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
case SLJIT_NOT_EQUAL_F64: case SLJIT_NOT_EQUAL_F64:
return 0x0; return 0x0;
case SLJIT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
return 0x3;
/* fallthrough */
case SLJIT_LESS: case SLJIT_LESS:
case SLJIT_LESS_F64: case SLJIT_LESS_F64:
return 0x2; return 0x2;
case SLJIT_NOT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
return 0x2;
/* fallthrough */
case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL:
case SLJIT_GREATER_EQUAL_F64: case SLJIT_GREATER_EQUAL_F64:
return 0x3; return 0x3;
@ -1732,7 +1745,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0xc; return 0xc;
case SLJIT_OVERFLOW: case SLJIT_OVERFLOW:
if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
return 0x0; return 0x0;
/* fallthrough */ /* fallthrough */
@ -1740,7 +1753,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0x7; return 0x7;
case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_OVERFLOW:
if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
return 0x1; return 0x1;
/* fallthrough */ /* fallthrough */
@ -1798,6 +1811,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 arg_types) sljit_s32 arg_types)
{ {
SLJIT_UNUSED_ARG(arg_types);
CHECK_ERROR_PTR(); CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
@ -1880,6 +1894,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
sljit_s32 arg_types, sljit_s32 arg_types,
sljit_s32 src, sljit_sw srcw) sljit_s32 src, sljit_sw srcw)
{ {
SLJIT_UNUSED_ARG(arg_types);
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
ADJUST_LOCAL_OFFSET(src, srcw); ADJUST_LOCAL_OFFSET(src, srcw);

View File

@ -620,7 +620,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ Although some clever things could be done here, "NOT IMM" does not worth the efforts. */
break; break;
case SLJIT_ADD: case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
nimm = NEGATE(imm); nimm = NEGATE(imm);
if (IS_2_LO_REGS(reg, dst)) { if (IS_2_LO_REGS(reg, dst)) {
if (imm <= 0x7) if (imm <= 0x7)
@ -648,13 +648,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
break; break;
case SLJIT_ADDC: case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
imm = get_imm(imm); imm = get_imm(imm);
if (imm != INVALID_IMM) if (imm != INVALID_IMM)
return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
break; break;
case SLJIT_SUB: case SLJIT_SUB:
/* SUB operation can be replaced by ADD because of the negative carry flag. */ compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
if (flags & ARG1_IMM) { if (flags & ARG1_IMM) {
if (imm == 0 && IS_2_LO_REGS(reg, dst)) if (imm == 0 && IS_2_LO_REGS(reg, dst))
return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
@ -672,6 +672,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
nimm = get_imm(NEGATE(imm)); nimm = get_imm(NEGATE(imm));
if (nimm != INVALID_IMM) if (nimm != INVALID_IMM)
return push_inst32(compiler, CMNI_W | RN4(reg) | nimm); return push_inst32(compiler, CMNI_W | RN4(reg) | nimm);
break;
} }
nimm = NEGATE(imm); nimm = NEGATE(imm);
if (IS_2_LO_REGS(reg, dst)) { if (IS_2_LO_REGS(reg, dst)) {
@ -700,6 +701,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm);
break; break;
case SLJIT_SUBC: case SLJIT_SUBC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (flags & ARG1_IMM) if (flags & ARG1_IMM)
break; break;
imm = get_imm(imm); imm = get_imm(imm);
@ -814,18 +816,19 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)));
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
case SLJIT_ADD: case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
if (IS_3_LO_REGS(dst, arg1, arg2)) if (IS_3_LO_REGS(dst, arg1, arg2))
return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS)) if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS))
return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_ADDC: case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_SUB: case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (flags & UNUSED_RETURN) { if (flags & UNUSED_RETURN) {
if (IS_2_LO_REGS(arg1, arg2)) if (IS_2_LO_REGS(arg1, arg2))
return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
@ -835,6 +838,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
case SLJIT_SUBC: case SLJIT_SUBC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2)) if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
@ -1078,7 +1082,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{ {
sljit_s32 size, i, tmp, word_arg_count; sljit_s32 size, i, tmp, word_arg_count, saved_arg_count;
sljit_uw offset; sljit_uw offset;
sljit_uw imm = 0; sljit_uw imm = 0;
#ifdef __SOFTFP__ #ifdef __SOFTFP__
@ -1129,6 +1133,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
word_arg_count = 0; word_arg_count = 0;
saved_arg_count = 0;
#ifdef __SOFTFP__ #ifdef __SOFTFP__
SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
@ -1147,7 +1152,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP) FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP)
| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
float_arg_count++; float_arg_count++;
offset += sizeof(sljit_f64); offset += sizeof(sljit_f64) - sizeof(sljit_sw);
break; break;
case SLJIT_ARG_TYPE_F32: case SLJIT_ARG_TYPE_F32:
if (offset < 4 * sizeof(sljit_sw)) if (offset < 4 * sizeof(sljit_sw))
@ -1156,21 +1161,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP) FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP)
| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
float_arg_count++; float_arg_count++;
offset += sizeof(sljit_f32);
break; break;
default: default:
SLJIT_ASSERT(reg_map[SLJIT_S0 - word_arg_count] <= 7); word_arg_count++;
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
tmp = SLJIT_S0 - saved_arg_count;
saved_arg_count++;
} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
tmp = word_arg_count;
else
break;
SLJIT_ASSERT(reg_map[tmp] <= 7);
if (offset < 4 * sizeof(sljit_sw)) if (offset < 4 * sizeof(sljit_sw))
FAIL_IF(push_inst16(compiler, MOV | RD3(SLJIT_S0 - word_arg_count) | (offset << 1))); FAIL_IF(push_inst16(compiler, MOV | RD3(tmp) | (offset << 1)));
else else
FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(SLJIT_S0 - word_arg_count) FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp)
| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
word_arg_count++;
offset += sizeof(sljit_sw);
break; break;
} }
offset += sizeof(sljit_sw);
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
} }
@ -1201,7 +1214,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
offset++; offset++;
break; break;
default: default:
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - word_arg_count, SLJIT_R0 + word_arg_count))); if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count)));
saved_arg_count++;
}
word_arg_count++; word_arg_count++;
break; break;
} }
@ -1602,14 +1619,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
} }
if (op == SLJIT_NEG) {
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1;
#endif
return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw);
}
flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
if (src & SLJIT_MEM) { if (src & SLJIT_MEM) {
@ -1982,10 +1991,20 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
case SLJIT_NOT_EQUAL_F64: case SLJIT_NOT_EQUAL_F64:
return 0x1; return 0x1;
case SLJIT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
return 0x2;
/* fallthrough */
case SLJIT_LESS: case SLJIT_LESS:
case SLJIT_LESS_F64: case SLJIT_LESS_F64:
return 0x3; return 0x3;
case SLJIT_NOT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
return 0x3;
/* fallthrough */
case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL:
case SLJIT_GREATER_EQUAL_F64: case SLJIT_GREATER_EQUAL_F64:
return 0x2; return 0x2;
@ -2011,7 +2030,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0xd; return 0xd;
case SLJIT_OVERFLOW: case SLJIT_OVERFLOW:
if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
return 0x1; return 0x1;
/* fallthrough */ /* fallthrough */
@ -2019,7 +2038,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0x6; return 0x6;
case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_OVERFLOW:
if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
return 0x0; return 0x0;
/* fallthrough */ /* fallthrough */

View File

@ -770,7 +770,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{ {
sljit_ins base; sljit_ins base;
sljit_s32 i, tmp, offset; sljit_s32 i, tmp, offset;
sljit_s32 arg_count, word_arg_count, float_arg_count; sljit_s32 arg_count, word_arg_count, saved_arg_count, float_arg_count;
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@ -863,6 +863,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
arg_count = 0; arg_count = 0;
word_arg_count = 0; word_arg_count = 0;
saved_arg_count = 0;
float_arg_count = 0; float_arg_count = 0;
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@ -901,13 +902,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, LWC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS)); FAIL_IF(push_inst(compiler, LWC1 | base | FT(float_arg_count) | IMM(local_size + (arg_count << 2)), MOVABLE_INS));
break; break;
default: default:
if (arg_count < 4)
FAIL_IF(push_inst(compiler, ADDU_W | SA(4 + arg_count) | TA(0) | D(SLJIT_S0 - word_arg_count),
DR(SLJIT_S0 - word_arg_count)));
else
FAIL_IF(push_inst(compiler, LW | base | T(SLJIT_S0 - word_arg_count) | IMM(local_size + (arg_count << 2)),
DR(SLJIT_S0 - word_arg_count)));
word_arg_count++; word_arg_count++;
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
tmp = SLJIT_S0 - saved_arg_count;
saved_arg_count++;
} else if (word_arg_count != arg_count + 1 || arg_count == 0)
tmp = word_arg_count;
else
break;
if (arg_count < 4)
FAIL_IF(push_inst(compiler, ADDU_W | SA(4 + arg_count) | TA(0) | D(tmp), DR(tmp)));
else
FAIL_IF(push_inst(compiler, LW | base | T(tmp) | IMM(local_size + (arg_count << 2)), DR(tmp)));
break; break;
} }
arg_count++; arg_count++;
@ -934,8 +942,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS)); FAIL_IF(push_inst(compiler, MOV_S | FMT_S | FS(TMP_FREG1) | FD(SLJIT_FR0), MOVABLE_INS));
break; break;
default: default:
FAIL_IF(push_inst(compiler, ADDU_W | SA(3 + arg_count) | TA(0) | D(SLJIT_S0 - word_arg_count), DR(SLJIT_S0 - word_arg_count)));
word_arg_count++; word_arg_count++;
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
tmp = SLJIT_S0 - saved_arg_count;
saved_arg_count++;
} else if (word_arg_count != arg_count || word_arg_count <= 1)
tmp = word_arg_count;
else
break;
FAIL_IF(push_inst(compiler, ADDU_W | SA(3 + arg_count) | TA(0) | D(tmp), DR(tmp)));
break; break;
} }
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
@ -1566,10 +1583,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_NOT: case SLJIT_NOT:
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_NEG:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
case SLJIT_CLZ: case SLJIT_CLZ:
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
} }
@ -1604,12 +1617,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
switch (GET_OPCODE(op)) { switch (GET_OPCODE(op)) {
case SLJIT_ADD: case SLJIT_ADD:
case SLJIT_ADDC: case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUB: case SLJIT_SUB:
case SLJIT_SUBC: case SLJIT_SUBC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_MUL: case SLJIT_MUL:
@ -1723,7 +1736,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
# define flags (sljit_u32)0 # define flags (sljit_u32)0
#else #else
sljit_u32 flags = (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64) << 21; sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21;
#endif #endif
if (src & SLJIT_MEM) { if (src & SLJIT_MEM) {
@ -1751,7 +1764,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
# define flags (sljit_u32)0 # define flags (sljit_u32)0
#else #else
sljit_u32 flags = (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW) << 21; sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21;
#endif #endif
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
@ -2053,6 +2066,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
case SLJIT_SIG_LESS: case SLJIT_SIG_LESS:
case SLJIT_SIG_GREATER: case SLJIT_SIG_GREATER:
case SLJIT_OVERFLOW: case SLJIT_OVERFLOW:
case SLJIT_CARRY:
BR_Z(OTHER_FLAG); BR_Z(OTHER_FLAG);
break; break;
case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL:
@ -2060,6 +2074,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
case SLJIT_SIG_GREATER_EQUAL: case SLJIT_SIG_GREATER_EQUAL:
case SLJIT_SIG_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL:
case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_OVERFLOW:
case SLJIT_NOT_CARRY:
BR_NZ(OTHER_FLAG); BR_NZ(OTHER_FLAG);
break; break;
case SLJIT_NOT_EQUAL_F64: case SLJIT_NOT_EQUAL_F64:
@ -2316,7 +2331,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
break; break;
case SLJIT_OVERFLOW: case SLJIT_OVERFLOW:
case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_OVERFLOW:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB) { if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
src_ar = OTHER_FLAG; src_ar = OTHER_FLAG;
break; break;
} }

View File

@ -86,11 +86,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
SLJIT_ASSERT(src1 == TMP_REG1); SLJIT_ASSERT(src1 == TMP_REG1);
return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
case SLJIT_NEG:
SLJIT_ASSERT(src1 == TMP_REG1);
/* Setting XER SO is not enough, CR SO is also needed. */
return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2));
case SLJIT_CLZ: case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1); SLJIT_ASSERT(src1 == TMP_REG1);
return push_inst(compiler, CNTLZW | S(src2) | A(dst)); return push_inst(compiler, CNTLZW | S(src2) | A(dst));
@ -158,7 +153,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if (flags & ALT_FORM3) { if (flags & ALT_FORM3) {
/* Setting XER SO is not enough, CR SO is also needed. */ /* Setting XER SO is not enough, CR SO is also needed. */
if (src1 != TMP_ZERO)
return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
return push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2));
} }
if (flags & ALT_FORM4) { if (flags & ALT_FORM4) {
@ -167,11 +164,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
} }
if (!(flags & ALT_SET_FLAGS)) if (!(flags & ALT_SET_FLAGS)) {
SLJIT_ASSERT(src1 != TMP_ZERO);
return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
}
if (flags & ALT_FORM5) if (flags & ALT_FORM5)
return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1));
if (src1 != TMP_ZERO)
return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2));
case SLJIT_SUBC: case SLJIT_SUBC:
return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));

View File

@ -199,19 +199,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
UN_EXTS(); UN_EXTS();
return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
case SLJIT_NEG:
SLJIT_ASSERT(src1 == TMP_REG1);
if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) {
FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2)));
return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
}
UN_EXTS();
/* Setting XER SO is not enough, CR SO is also needed. */
return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2));
case SLJIT_CLZ: case SLJIT_CLZ:
SLJIT_ASSERT(src1 == TMP_REG1); SLJIT_ASSERT(src1 == TMP_REG1);
if (flags & ALT_FORM1) if (flags & ALT_FORM1)
@ -299,13 +286,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
if (flags & ALT_FORM3) { if (flags & ALT_FORM3) {
if (flags & ALT_SIGN_EXT) { if (flags & ALT_SIGN_EXT) {
if (src1 != TMP_ZERO) {
FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
src1 = TMP_REG1; src1 = TMP_REG1;
}
if (src2 != TMP_ZERO) {
FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
src2 = TMP_REG2; src2 = TMP_REG2;
} }
}
/* Setting XER SO is not enough, CR SO is also needed. */ /* Setting XER SO is not enough, CR SO is also needed. */
if (src1 != TMP_ZERO)
FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)));
else
FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)));
if (flags & ALT_SIGN_EXT) if (flags & ALT_SIGN_EXT)
return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
@ -317,12 +313,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
} }
if (!(flags & ALT_SET_FLAGS)) if (!(flags & ALT_SET_FLAGS)) {
SLJIT_ASSERT(src1 != TMP_ZERO);
return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
}
BIN_EXTS(); BIN_EXTS();
if (flags & ALT_FORM5) if (flags & ALT_FORM5)
return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1));
if (src1 != TMP_ZERO)
return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2));
case SLJIT_SUBC: case SLJIT_SUBC:
BIN_EXTS(); BIN_EXTS();

View File

@ -721,6 +721,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{ {
sljit_s32 i, tmp, base, offset; sljit_s32 i, tmp, base, offset;
sljit_s32 word_arg_count = 0; sljit_s32 word_arg_count = 0;
sljit_s32 saved_arg_count = 0;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
sljit_s32 arg_count = 0; sljit_s32 arg_count = 0;
#endif #endif
@ -791,9 +792,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
while (arg_types > 0) { while (arg_types > 0) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + arg_count) | A(SLJIT_S0 - word_arg_count) | B(SLJIT_R0 + arg_count))); do {
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
tmp = SLJIT_S0 - saved_arg_count;
saved_arg_count++;
} else if (arg_count != word_arg_count)
tmp = SLJIT_R0 + word_arg_count;
else
break;
FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + arg_count) | A(tmp) | B(SLJIT_R0 + arg_count)));
} while (0);
#else #else
FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + word_arg_count) | A(SLJIT_S0 - word_arg_count) | B(SLJIT_R0 + word_arg_count))); if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0 + word_arg_count) | A(SLJIT_S0 - saved_arg_count) | B(SLJIT_R0 + word_arg_count)));
saved_arg_count++;
}
#endif #endif
word_arg_count++; word_arg_count++;
} }
@ -1153,9 +1167,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
flags |= REG1_SOURCE; flags |= REG1_SOURCE;
} }
else if (src1 & SLJIT_IMM) { else if (src1 & SLJIT_IMM) {
src1_r = TMP_ZERO;
if (src1w != 0) {
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
src1_r = TMP_REG1; src1_r = TMP_REG1;
} }
}
else { else {
FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
src1_r = TMP_REG1; src1_r = TMP_REG1;
@ -1170,9 +1187,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
dst_r = src2_r; dst_r = src2_r;
} }
else if (src2 & SLJIT_IMM) { else if (src2 & SLJIT_IMM) {
src2_r = TMP_ZERO;
if (src2w != 0) {
FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
src2_r = sugg_src2_r; src2_r = sugg_src2_r;
} }
}
else { else {
FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2)); FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, TMP_REG2));
src2_r = sugg_src2_r; src2_r = sugg_src2_r;
@ -1277,8 +1297,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src, srcw); ADJUST_LOCAL_OFFSET(src, srcw);
op = GET_OPCODE(op); op = GET_OPCODE(op);
if ((src & SLJIT_IMM) && srcw == 0)
src = TMP_ZERO;
if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW)
FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
@ -1343,9 +1361,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_NOT: case SLJIT_NOT:
return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_NEG:
return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_CLZ: case SLJIT_CLZ:
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_32) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
@ -1424,11 +1439,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src1, src1w);
ADJUST_LOCAL_OFFSET(src2, src2w); ADJUST_LOCAL_OFFSET(src2, src2w);
if ((src1 & SLJIT_IMM) && src1w == 0)
src1 = TMP_ZERO;
if ((src2 & SLJIT_IMM) && src2w == 0)
src2 = TMP_ZERO;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
if (op & SLJIT_32) { if (op & SLJIT_32) {
/* Most operations expect sign extended arguments. */ /* Most operations expect sign extended arguments. */
@ -1446,6 +1456,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
switch (GET_OPCODE(op)) { switch (GET_OPCODE(op)) {
case SLJIT_ADD: case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
if (TEST_ADD_FORM1(op)) if (TEST_ADD_FORM1(op))
return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
@ -1503,9 +1515,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ADDC: case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUB: case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) {
if (dst == TMP_REG2) { if (dst == TMP_REG2) {
if (TEST_UL_IMM(src2, src2w)) { if (TEST_UL_IMM(src2, src2w)) {
@ -1567,6 +1582,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUBC: case SLJIT_SUBC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w); return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_MUL: case SLJIT_MUL:
@ -2025,12 +2041,22 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
return label; return label;
} }
static sljit_ins get_bo_bi_flags(sljit_s32 type) static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type)
{ {
switch (type) { switch (type) {
case SLJIT_NOT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
return (4 << 21) | (2 << 16);
/* fallthrough */
case SLJIT_EQUAL: case SLJIT_EQUAL:
return (12 << 21) | (2 << 16); return (12 << 21) | (2 << 16);
case SLJIT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
return (12 << 21) | (2 << 16);
/* fallthrough */
case SLJIT_NOT_EQUAL: case SLJIT_NOT_EQUAL:
return (4 << 21) | (2 << 16); return (4 << 21) | (2 << 16);
@ -2094,7 +2120,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
CHECK_ERROR_PTR(); CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_jump(compiler, type)); CHECK_PTR(check_sljit_emit_jump(compiler, type));
bo_bi_flags = get_bo_bi_flags(type & 0xff); bo_bi_flags = get_bo_bi_flags(compiler, type & 0xff);
if (!bo_bi_flags) if (!bo_bi_flags)
return NULL; return NULL;
@ -2103,6 +2129,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP); set_jump(jump, compiler, (sljit_u32)type & SLJIT_REWRITABLE_JUMP);
type &= 0xff; type &= 0xff;
if (type == SLJIT_CARRY || type == SLJIT_NOT_CARRY)
PTR_FAIL_IF(push_inst(compiler, ADDE | RC(ALT_SET_FLAGS) | D(TMP_REG1) | A(TMP_ZERO) | B(TMP_ZERO)));
/* In PPC, we don't need to touch the arguments. */ /* In PPC, we don't need to touch the arguments. */
if (type < SLJIT_JUMP) if (type < SLJIT_JUMP)
jump->flags |= IS_COND; jump->flags |= IS_COND;
@ -2227,7 +2256,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
sljit_s32 type) sljit_s32 type)
{ {
sljit_s32 reg, invert; sljit_s32 reg, invert;
sljit_u32 cr_bit; sljit_u32 bit, from_xer;
sljit_s32 saved_op = op; sljit_s32 saved_op = op;
sljit_sw saved_dstw = dstw; sljit_sw saved_dstw = dstw;
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
@ -2247,7 +2276,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1)); FAIL_IF(emit_op_mem(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG1));
invert = 0; invert = 0;
cr_bit = 0; bit = 0;
from_xer = 0;
switch (type & 0xff) { switch (type & 0xff) {
case SLJIT_LESS: case SLJIT_LESS:
@ -2261,66 +2291,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
case SLJIT_GREATER: case SLJIT_GREATER:
case SLJIT_SIG_GREATER: case SLJIT_SIG_GREATER:
cr_bit = 1; bit = 1;
break; break;
case SLJIT_LESS_EQUAL: case SLJIT_LESS_EQUAL:
case SLJIT_SIG_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL:
cr_bit = 1; bit = 1;
invert = 1; invert = 1;
break; break;
case SLJIT_EQUAL: case SLJIT_EQUAL:
cr_bit = 2; bit = 2;
break; break;
case SLJIT_NOT_EQUAL: case SLJIT_NOT_EQUAL:
cr_bit = 2; bit = 2;
invert = 1; invert = 1;
break; break;
case SLJIT_OVERFLOW: case SLJIT_OVERFLOW:
cr_bit = 3; from_xer = 1;
bit = 1;
break; break;
case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_OVERFLOW:
cr_bit = 3; from_xer = 1;
bit = 1;
invert = 1; invert = 1;
break; break;
case SLJIT_CARRY:
from_xer = 1;
bit = 2;
invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB) != 0;
break;
case SLJIT_NOT_CARRY:
from_xer = 1;
bit = 2;
invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0;
break;
case SLJIT_LESS_F64: case SLJIT_LESS_F64:
cr_bit = 4 + 0; bit = 4 + 0;
break; break;
case SLJIT_GREATER_EQUAL_F64: case SLJIT_GREATER_EQUAL_F64:
cr_bit = 4 + 0; bit = 4 + 0;
invert = 1; invert = 1;
break; break;
case SLJIT_GREATER_F64: case SLJIT_GREATER_F64:
cr_bit = 4 + 1; bit = 4 + 1;
break; break;
case SLJIT_LESS_EQUAL_F64: case SLJIT_LESS_EQUAL_F64:
cr_bit = 4 + 1; bit = 4 + 1;
invert = 1; invert = 1;
break; break;
case SLJIT_EQUAL_F64: case SLJIT_EQUAL_F64:
cr_bit = 4 + 2; bit = 4 + 2;
break; break;
case SLJIT_NOT_EQUAL_F64: case SLJIT_NOT_EQUAL_F64:
cr_bit = 4 + 2; bit = 4 + 2;
invert = 1; invert = 1;
break; break;
case SLJIT_UNORDERED_F64: case SLJIT_UNORDERED_F64:
cr_bit = 4 + 3; bit = 4 + 3;
break; break;
case SLJIT_ORDERED_F64: case SLJIT_ORDERED_F64:
cr_bit = 4 + 3; bit = 4 + 3;
invert = 1; invert = 1;
break; break;
@ -2329,8 +2373,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
break; break;
} }
FAIL_IF(push_inst(compiler, MFCR | D(reg))); FAIL_IF(push_inst(compiler, (from_xer ? MFXER : MFCR) | D(reg)));
FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + cr_bit) << 11) | (31 << 6) | (31 << 1))); FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + bit) << 11) | (31 << 6) | (31 << 1)));
if (invert) if (invert)
FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1)); FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));

View File

@ -198,7 +198,8 @@ static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
} }
#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \ #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB) (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
/* Map the given type to a 4-bit condition code mask. */ /* Map the given type to a 4-bit condition code mask. */
static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) { static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
@ -256,10 +257,20 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
case SLJIT_LESS_F64: case SLJIT_LESS_F64:
return cc1; return cc1;
case SLJIT_NOT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
return (cc2 | cc3);
/* fallthrough */
case SLJIT_SIG_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL:
case SLJIT_LESS_EQUAL_F64: case SLJIT_LESS_EQUAL_F64:
return (cc0 | cc1); return (cc0 | cc1);
case SLJIT_CARRY:
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
return (cc0 | cc1);
/* fallthrough */
case SLJIT_SIG_GREATER: case SLJIT_SIG_GREATER:
/* Overflow is considered greater, see SLJIT_SUB. */ /* Overflow is considered greater, see SLJIT_SUB. */
return cc2 | cc3; return cc2 | cc3;
@ -1037,8 +1048,8 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
sljit_gpr src_r = tmp1; sljit_gpr src_r = tmp1;
sljit_s32 needs_move = 1; sljit_s32 needs_move = 1;
if (IS_GPR_REG(dst)) { if (FAST_IS_REG(dst)) {
dst_r = gpr(dst & REG_MASK); dst_r = gpr(dst);
if (dst == src1) if (dst == src1)
needs_move = 0; needs_move = 0;
@ -1052,7 +1063,7 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
FAIL_IF(emit_move(compiler, dst_r, src1, src1w)); FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
if (FAST_IS_REG(src2)) if (FAST_IS_REG(src2))
src_r = gpr(src2 & REG_MASK); src_r = gpr(src2);
else else
FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
@ -1065,6 +1076,21 @@ static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0)); return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
} }
static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst,
sljit_s32 src1, sljit_sw src1w)
{
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
sljit_gpr src_r = tmp1;
if (FAST_IS_REG(src1))
src_r = gpr(src1);
else
FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
}
static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins, static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_s32 dst,
sljit_s32 src1, sljit_sw src1w, sljit_s32 src1, sljit_sw src1w,
@ -1075,12 +1101,12 @@ static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
sljit_gpr src2_r = tmp1; sljit_gpr src2_r = tmp1;
if (FAST_IS_REG(src1)) if (FAST_IS_REG(src1))
src1_r = gpr(src1 & REG_MASK); src1_r = gpr(src1);
else else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w)); FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
if (FAST_IS_REG(src2)) if (FAST_IS_REG(src2))
src2_r = gpr(src2 & REG_MASK); src2_r = gpr(src2);
else else
FAIL_IF(emit_move(compiler, tmp1, src2, src2w)); FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
@ -1101,8 +1127,8 @@ static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
sljit_gpr dst_r = tmp0; sljit_gpr dst_r = tmp0;
sljit_s32 needs_move = 1; sljit_s32 needs_move = 1;
if (IS_GPR_REG(dst)) { if (FAST_IS_REG(dst)) {
dst_r = gpr(dst & REG_MASK); dst_r = gpr(dst);
if (dst == src1) if (dst == src1)
needs_move = 0; needs_move = 0;
@ -1121,7 +1147,7 @@ static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 src1, sljit_sw src1w, sljit_s32 src1, sljit_sw src1w,
sljit_sw src2w) sljit_sw src2w)
{ {
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0; sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
sljit_gpr src_r = tmp0; sljit_gpr src_r = tmp0;
if (!FAST_IS_REG(src1)) if (!FAST_IS_REG(src1))
@ -1149,7 +1175,7 @@ static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
SLJIT_ASSERT(src2 & SLJIT_MEM); SLJIT_ASSERT(src2 & SLJIT_MEM);
if (IS_GPR_REG(dst)) { if (FAST_IS_REG(dst)) {
dst_r = gpr(dst); dst_r = gpr(dst);
if (dst == src1) if (dst == src1)
@ -1610,7 +1636,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{ {
sljit_s32 arg_count = 0; sljit_s32 word_arg_count = 0;
sljit_s32 offset, i, tmp; sljit_s32 offset, i, tmp;
CHECK_ERROR(); CHECK_ERROR();
@ -1659,10 +1685,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
tmp = 0;
while (arg_types > 0) { while (arg_types > 0) {
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - arg_count), gpr(SLJIT_R0 + arg_count)))); if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
arg_count++; FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count))));
tmp++;
}
word_arg_count++;
} }
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
@ -2088,14 +2118,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
FAIL_IF(push_inst(compiler, xr(dst_r, tmp1))); FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
} }
break; break;
case SLJIT_NEG:
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB;
FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r)));
break;
case SLJIT_NEG32:
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB;
FAIL_IF(push_inst(compiler, lcr(dst_r, src_r)));
break;
case SLJIT_CLZ: case SLJIT_CLZ:
if (have_eimm()) { if (have_eimm()) {
FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */ FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
@ -2149,20 +2171,6 @@ static SLJIT_INLINE int is_shift(sljit_s32 op) {
return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0; return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
} }
static SLJIT_INLINE int sets_signed_flag(sljit_s32 op)
{
switch (GET_FLAG_TYPE(op)) {
case SLJIT_OVERFLOW:
case SLJIT_NOT_OVERFLOW:
case SLJIT_SIG_LESS:
case SLJIT_SIG_LESS_EQUAL:
case SLJIT_SIG_GREATER:
case SLJIT_SIG_GREATER_EQUAL:
return 1;
}
return 0;
}
static const struct ins_forms add_forms = { static const struct ins_forms add_forms = {
0x1a00, /* ar */ 0x1a00, /* ar */
0xb9080000, /* agr */ 0xb9080000, /* agr */
@ -2267,13 +2275,14 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src1, sljit_sw src1w, sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w) sljit_s32 src2, sljit_sw src2w)
{ {
int sets_signed = sets_signed_flag(op); sljit_s32 flag_type = GET_FLAG_TYPE(op);
int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW); int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
const struct ins_forms *forms; const struct ins_forms *forms;
sljit_ins ins; sljit_ins ins;
if (dst == (sljit_s32)tmp0 && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS; int compare_signed = flag_type >= SLJIT_SIG_LESS;
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE; compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
@ -2314,6 +2323,12 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w); return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
} }
if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
goto done;
}
if (src2 & SLJIT_IMM) { if (src2 & SLJIT_IMM) {
sljit_sw neg_src2w = -src2w; sljit_sw neg_src2w = -src2w;
@ -2663,9 +2678,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
compiler->mode = op & SLJIT_32; compiler->mode = op & SLJIT_32;
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z); compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC)
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB;
if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) { if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
src1 ^= src2; src1 ^= src2;
src2 ^= src1; src2 ^= src1;
@ -2678,15 +2690,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
switch (GET_OPCODE(op)) { switch (GET_OPCODE(op)) {
case SLJIT_ADD: case SLJIT_ADD:
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w); return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_ADDC: case SLJIT_ADDC:
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w)); FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
if (dst & SLJIT_MEM) if (dst & SLJIT_MEM)
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
case SLJIT_SUB: case SLJIT_SUB:
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w); return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUBC: case SLJIT_SUBC:
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w)); FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
if (dst & SLJIT_MEM) if (dst & SLJIT_MEM)
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32); return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);

View File

@ -88,17 +88,19 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS);
case SLJIT_ADD: case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
case SLJIT_ADDC: case SLJIT_ADDC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
case SLJIT_SUB: case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB; compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
case SLJIT_SUBC: case SLJIT_SUBC:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
case SLJIT_MUL: case SLJIT_MUL:

View File

@ -512,9 +512,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{ {
sljit_s32 reg_index, types; sljit_s32 reg_index, types, tmp;
sljit_u32 float_offset, args_offset; sljit_u32 float_offset, args_offset;
sljit_s32 word_arg_index, float_arg_index; sljit_s32 saved_arg_index, scratch_arg_index, float_arg_index;
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@ -564,7 +564,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
args_offset = (16 + 1 + 6) * sizeof(sljit_sw); args_offset = (16 + 1 + 6) * sizeof(sljit_sw);
float_offset = 16 * sizeof(sljit_sw); float_offset = 16 * sizeof(sljit_sw);
reg_index = 24; reg_index = 24;
word_arg_index = 24; saved_arg_index = 24;
scratch_arg_index = 8 - 1;
float_arg_index = 1; float_arg_index = 1;
while (arg_types) { while (arg_types) {
@ -593,14 +594,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
float_offset += sizeof(sljit_f64); float_offset += sizeof(sljit_f64);
break; break;
default: default:
if (reg_index != word_arg_index) { scratch_arg_index++;
if (reg_index < 24 + 6)
FAIL_IF(push_inst(compiler, OR | DA(word_arg_index) | S1(0) | S2A(reg_index), word_arg_index));
else
FAIL_IF(push_inst(compiler, LDUW | DA(word_arg_index) | S1A(30) | IMM(args_offset), word_arg_index));
}
word_arg_index++; if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
tmp = saved_arg_index++;
if (tmp == reg_index)
break;
} else
tmp = scratch_arg_index;
if (reg_index < 24 + 6)
FAIL_IF(push_inst(compiler, OR | DA(tmp) | S1(0) | S2A(reg_index), tmp));
else
FAIL_IF(push_inst(compiler, LDUW | DA(tmp) | S1A(30) | IMM(args_offset), tmp));
break; break;
} }
@ -1018,9 +1024,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
case SLJIT_NOT: case SLJIT_NOT:
case SLJIT_CLZ: case SLJIT_CLZ:
return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
case SLJIT_NEG:
return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
} }
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
@ -1395,10 +1398,12 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
case SLJIT_LESS: case SLJIT_LESS:
case SLJIT_GREATER_F64: /* Unordered. */ case SLJIT_GREATER_F64: /* Unordered. */
case SLJIT_CARRY:
return DA(0x5); return DA(0x5);
case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL:
case SLJIT_LESS_EQUAL_F64: case SLJIT_LESS_EQUAL_F64:
case SLJIT_NOT_CARRY:
return DA(0xd); return DA(0xd);
case SLJIT_GREATER: case SLJIT_GREATER:
@ -1422,7 +1427,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return DA(0x2); return DA(0x2);
case SLJIT_OVERFLOW: case SLJIT_OVERFLOW:
if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
return DA(0x9); return DA(0x9);
/* fallthrough */ /* fallthrough */
@ -1430,7 +1435,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return DA(0x7); return DA(0x7);
case SLJIT_NOT_OVERFLOW: case SLJIT_NOT_OVERFLOW:
if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB)) if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
return DA(0x1); return DA(0x1);
/* fallthrough */ /* fallthrough */

View File

@ -243,13 +243,23 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
return code_ptr; return code_ptr;
} }
#define ENTER_R2_USED 0x00001
#define ENTER_R2_TO_S 0x00002
#define ENTER_R2_TO_R0 0x00004
#define ENTER_R1_TO_S 0x00008
#define ENTER_TMP_TO_R4 0x00010
#define ENTER_TMP_TO_S 0x00020
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{ {
sljit_s32 word_arg_count, float_arg_count, args_size, types; sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
sljit_uw size; sljit_s32 size, locals_offset, args_size, types, status;
sljit_u8 *inst; sljit_u8 *inst;
#ifdef _WIN32
sljit_s32 r2_offset = -1;
#endif
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@ -263,8 +273,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
types = arg_types; types = arg_types;
word_arg_count = 0; word_arg_count = 0;
saved_arg_count = 0;
float_arg_count = 0; float_arg_count = 0;
args_size = SSIZE_OF(sw); args_size = SSIZE_OF(sw);
status = 0;
while (types) { while (types) {
switch (types & SLJIT_ARG_MASK) { switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64: case SLJIT_ARG_TYPE_F64:
@ -279,12 +291,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
break; break;
default: default:
word_arg_count++; word_arg_count++;
if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
saved_arg_count++;
if (saved_arg_count == 4)
status |= ENTER_TMP_TO_S;
} else {
if (word_arg_count == 4)
status |= ENTER_TMP_TO_R4;
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (word_arg_count > 2) if (word_arg_count == 3)
args_size += SSIZE_OF(sw); status |= ENTER_R2_USED;
#else
args_size += SSIZE_OF(sw);
#endif #endif
}
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL))
break;
#endif
args_size += SSIZE_OF(sw);
break; break;
} }
types >>= SLJIT_ARG_SHIFT; types >>= SLJIT_ARG_SHIFT;
@ -294,30 +320,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->args_size = args_size; compiler->args_size = args_size;
/* [esp+0] for saving temporaries and function calls. */ /* [esp+0] for saving temporaries and function calls. */
compiler->stack_tmp_size = 2 * SSIZE_OF(sw); locals_offset = 2 * SSIZE_OF(sw);
#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (scratches > 3) if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
compiler->stack_tmp_size = 3 * SSIZE_OF(sw); locals_offset = 4 * SSIZE_OF(sw);
#else
if (scratches >= 3)
locals_offset = 4 * SSIZE_OF(sw);
#endif #endif
compiler->saveds_offset = compiler->stack_tmp_size; compiler->scratches_offset = locals_offset;
if (scratches > 3)
compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
compiler->locals_offset = compiler->saveds_offset; if (scratches > 3)
locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
if (saveds > 3) if (saveds > 3)
compiler->locals_offset += (saveds - 3) * SSIZE_OF(sw); locals_offset += (saveds - 3) * SSIZE_OF(sw);
if (options & SLJIT_F64_ALIGNMENT) compiler->locals_offset = locals_offset;
compiler->locals_offset = (compiler->locals_offset + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1);
size = (sljit_uw)(1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)); size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
inst = (sljit_u8*)ensure_buf(compiler, 1 + size); inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(size); INC_SIZE((sljit_uw)size);
PUSH_REG(reg_map[TMP_REG1]); PUSH_REG(reg_map[TMP_REG1]);
if (saveds > 2 || scratches > 9) if (saveds > 2 || scratches > 9)
PUSH_REG(reg_map[SLJIT_S2]); PUSH_REG(reg_map[SLJIT_S2]);
@ -326,11 +353,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (saveds > 0 || scratches > 11) if (saveds > 0 || scratches > 11)
PUSH_REG(reg_map[SLJIT_S0]); PUSH_REG(reg_map[SLJIT_S0]);
if (word_arg_count >= 4) size *= SSIZE_OF(sw);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + (sljit_s32)(size * sizeof(sljit_sw)));
if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
size += SSIZE_OF(sw);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (!(options & SLJIT_ENTER_CDECL))
size += args_size;
#endif
local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size;
compiler->local_size = local_size;
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (!(options & SLJIT_ENTER_CDECL))
size -= args_size;
#endif
word_arg_count = 0; word_arg_count = 0;
args_size = (sljit_s32)((size + 1) * sizeof(sljit_sw)); saved_arg_count = 0;
args_size = size;
while (arg_types) { while (arg_types) {
switch (arg_types & SLJIT_ARG_MASK) { switch (arg_types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64: case SLJIT_ARG_TYPE_F64:
@ -341,12 +386,41 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
break; break;
default: default:
word_arg_count++; word_arg_count++;
if (word_arg_count <= 3) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (word_arg_count <= 2) if (!(options & SLJIT_ENTER_CDECL) && word_arg_count <= 2) {
if (word_arg_count == 1) {
if (status & ENTER_R2_USED) {
EMIT_MOV(compiler, (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? SLJIT_R0 : SLJIT_S0, 0, SLJIT_R2, 0);
} else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
status |= ENTER_R2_TO_S;
saved_arg_count++;
} else
status |= ENTER_R2_TO_R0;
} else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
status |= ENTER_R1_TO_S;
saved_arg_count++;
}
break; break;
}
#endif #endif
EMIT_MOV(compiler, SLJIT_S0 + 1 - word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
SLJIT_ASSERT(word_arg_count <= 3 || (status & ENTER_TMP_TO_R4));
if (word_arg_count <= 3) {
#ifdef _WIN32
if (word_arg_count == 3 && local_size > 4 * 4096)
r2_offset = local_size + args_size;
else
#endif
EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
}
} else {
SLJIT_ASSERT(saved_arg_count <= 3 || (status & ENTER_TMP_TO_S));
if (saved_arg_count <= 3)
EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
saved_arg_count++;
} }
args_size += SSIZE_OF(sw); args_size += SSIZE_OF(sw);
break; break;
@ -355,95 +429,71 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
} }
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (word_arg_count > 0) if (!(options & SLJIT_ENTER_CDECL)) {
if (status & ENTER_R2_TO_R0)
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0);
saved_arg_count = 0;
if (status & ENTER_R2_TO_S) {
EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0); EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0);
if (word_arg_count > 1) saved_arg_count++;
EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_R1, 0); }
if (status & ENTER_R1_TO_S)
EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_R1, 0);
}
#endif #endif
SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
#if defined(__APPLE__)
/* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw);
local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
#else
if (options & SLJIT_F64_ALIGNMENT)
local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1));
else
local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(sw) - 1) & ~(SSIZE_OF(sw) - 1));
#endif
compiler->local_size = local_size;
#ifdef _WIN32 #ifdef _WIN32
if (local_size > 0) { SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
if (local_size > 4096) {
if (local_size <= 4 * 4096) { if (local_size <= 4 * 4096) {
if (local_size > 4096) BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096);
if (local_size > 2 * 4096) if (local_size > 2 * 4096)
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2); BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
if (local_size > 3 * 4096) if (local_size > 3 * 4096)
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
} }
else { else {
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(2); INC_SIZE(2);
inst[0] = JNE_i8; inst[0] = LOOP_i8;
inst[1] = (sljit_s8) -16; inst[1] = (sljit_u8)-16;
local_size &= 0xfff;
} }
} }
#endif
if (local_size > 0) {
BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
}
if (r2_offset != -1)
EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
#else /* !_WIN32 */
SLJIT_ASSERT(local_size > 0); SLJIT_ASSERT(local_size > 0);
#if !defined(__APPLE__) BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
if (options & SLJIT_F64_ALIGNMENT) {
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
/* Some space might allocated during sljit_grow_stack() above on WIN32. */ #endif /* _WIN32 */
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + SSIZE_OF(sw)));
#if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
if (compiler->local_size > 1024) size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : compiler->locals_offset - SSIZE_OF(sw);
FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, SSIZE_OF(sw)));
#endif
inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
FAIL_IF(!inst);
INC_SIZE(6);
inst[0] = GROUP_BINARY_81;
inst[1] = MOD_REG | AND | reg_map[SLJIT_SP];
sljit_unaligned_store_sw(inst + 2, ~(SSIZE_OF(f64) - 1));
if (word_arg_count == 4)
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), compiler->locals_offset - SSIZE_OF(sw), TMP_REG1, 0);
/* The real local size must be used. */
return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, SLJIT_R0, 0);
} }
#endif
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
if (word_arg_count == 4)
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), compiler->locals_offset - SSIZE_OF(sw), TMP_REG1, 0);
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -452,7 +502,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{ {
sljit_s32 args_size; sljit_s32 args_size, locals_offset;
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
sljit_s32 word_arg_count = 0; sljit_s32 word_arg_count = 0;
#endif #endif
@ -487,34 +537,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
compiler->args_size = args_size; compiler->args_size = args_size;
/* [esp+0] for saving temporaries and function calls. */ /* [esp+0] for saving temporaries and function calls. */
compiler->stack_tmp_size = 2 * SSIZE_OF(sw); locals_offset = 2 * SSIZE_OF(sw);
#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (scratches > 3) if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
compiler->stack_tmp_size = 3 * SSIZE_OF(sw); locals_offset = 4 * SSIZE_OF(sw);
#else
if (scratches >= 3)
locals_offset = 4 * SSIZE_OF(sw);
#endif #endif
compiler->saveds_offset = compiler->stack_tmp_size; compiler->scratches_offset = locals_offset;
if (scratches > 3)
compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
compiler->locals_offset = compiler->saveds_offset; if (scratches > 3)
locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
if (saveds > 3) if (saveds > 3)
compiler->locals_offset += (saveds - 3) * SSIZE_OF(sw); locals_offset += (saveds - 3) * SSIZE_OF(sw);
if (options & SLJIT_F64_ALIGNMENT) compiler->locals_offset = locals_offset;
compiler->locals_offset = (compiler->locals_offset + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1);
#if defined(__APPLE__)
saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw); saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw);
compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
#else #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (options & SLJIT_F64_ALIGNMENT) if (!(options & SLJIT_ENTER_CDECL))
compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1)); saveds += args_size;
else
compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(sw) - 1) & ~(SSIZE_OF(sw) - 1));
#endif #endif
compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds;
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -552,22 +602,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
SLJIT_ASSERT(compiler->args_size >= 0); SLJIT_ASSERT(compiler->args_size >= 0);
SLJIT_ASSERT(compiler->local_size > 0); SLJIT_ASSERT(compiler->local_size > 0);
#if !defined(__APPLE__) BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
if (compiler->options & SLJIT_F64_ALIGNMENT)
EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
else
FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#else
FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#endif
FAIL_IF(emit_stack_frame_release(compiler)); FAIL_IF(emit_stack_frame_release(compiler));
size = 1; size = 1;
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (compiler->args_size > 0) if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL))
size = 3; size = 3;
#endif #endif
inst = (sljit_u8*)ensure_buf(compiler, 1 + size); inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
@ -576,7 +617,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
INC_SIZE(size); INC_SIZE(size);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (compiler->args_size > 0) { if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) {
RET_I16(U8(compiler->args_size)); RET_I16(U8(compiler->args_size));
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -637,10 +678,9 @@ static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler,
} }
else if (stack_size > 0) { else if (stack_size > 0) {
if (word_arg_count >= 4) if (word_arg_count >= 4)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw)); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
stack_size = 0; stack_size = 0;
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
@ -725,27 +765,23 @@ static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, slji
if (word_arg_count_ptr) if (word_arg_count_ptr)
*word_arg_count_ptr = word_arg_count; *word_arg_count_ptr = word_arg_count;
if (stack_size <= compiler->stack_tmp_size) if (stack_size <= compiler->scratches_offset)
return 0; return 0;
#if defined(__APPLE__) return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf);
return ((stack_size - compiler->stack_tmp_size + 15) & ~15);
#else
return stack_size - compiler->stack_tmp_size;
#endif
} }
static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler, static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler,
sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count) sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count)
{ {
sljit_s32 float_arg_count = 0; sljit_s32 float_arg_count = 0;
sljit_u8 *inst;
if (word_arg_count >= 4) if (word_arg_count >= 4)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw)); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
if (stack_size > 0) if (stack_size > 0)
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
stack_size = 0; stack_size = 0;
word_arg_count = 0; word_arg_count = 0;
@ -783,8 +819,7 @@ static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
sljit_s32 single; sljit_s32 single;
if (stack_size > 0) if (stack_size > 0)
FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
@ -808,7 +843,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
sljit_sw args_size, prev_args_size, saved_regs_size; sljit_sw args_size, prev_args_size, saved_regs_size;
sljit_sw types, word_arg_count, float_arg_count; sljit_sw types, word_arg_count, float_arg_count;
sljit_sw stack_size, prev_stack_size, min_size, offset; sljit_sw stack_size, prev_stack_size, min_size, offset;
sljit_sw base_reg, word_arg4_offset; sljit_sw word_arg4_offset;
sljit_u8 r2_offset = 0; sljit_u8 r2_offset = 0;
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL; sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL;
@ -852,7 +887,11 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
arg_types >>= SLJIT_ARG_SHIFT; arg_types >>= SLJIT_ARG_SHIFT;
} }
if (args_size <= compiler->args_size) { if (args_size <= compiler->args_size
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
&& (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call)
#endif /* SLJIT_X86_32_FASTCALL */
&& 1) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
*extra_space = fast_call ? 0 : args_size; *extra_space = fast_call ? 0 : args_size;
prev_args_size = compiler->args_size; prev_args_size = compiler->args_size;
@ -862,18 +901,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
stack_size = args_size + SSIZE_OF(sw) + saved_regs_size; stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
#endif /* SLJIT_X86_32_FASTCALL */ #endif /* SLJIT_X86_32_FASTCALL */
#if !defined(__APPLE__)
if (compiler->options & SLJIT_F64_ALIGNMENT) {
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size);
offset = stack_size;
base_reg = SLJIT_MEM1(TMP_REG1);
} else {
#endif /* !__APPLE__ */
offset = stack_size + compiler->local_size; offset = stack_size + compiler->local_size;
base_reg = SLJIT_MEM1(SLJIT_SP);
#if !defined(__APPLE__)
}
#endif /* !__APPLE__ */
if (!(src & SLJIT_IMM) && src != SLJIT_R0) { if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
if (word_arg_count >= 1) { if (word_arg_count >= 1) {
@ -884,6 +912,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
} }
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (!(compiler->options & SLJIT_ENTER_CDECL)) {
if (!fast_call) if (!fast_call)
offset -= SSIZE_OF(sw); offset -= SSIZE_OF(sw);
@ -898,17 +927,18 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
/* In cdecl mode, at least one more word value must /* In cdecl mode, at least one more word value must
* be present on the stack before the return address. */ * be present on the stack before the return address. */
EMIT_MOV(compiler, base_reg, offset - word_arg4_offset, SLJIT_R2, 0); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0);
} }
if (fast_call) { if (fast_call) {
if (args_size < prev_args_size) { if (args_size < prev_args_size) {
EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, offset - prev_args_size - SSIZE_OF(sw)); EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw));
EMIT_MOV(compiler, base_reg, offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
} }
} else if (prev_args_size > 0) { } else if (prev_args_size > 0) {
EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, offset - prev_args_size); EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size);
EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
}
} }
#endif /* SLJIT_X86_32_FASTCALL */ #endif /* SLJIT_X86_32_FASTCALL */
@ -916,12 +946,12 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
switch (types & SLJIT_ARG_MASK) { switch (types & SLJIT_ARG_MASK) {
case SLJIT_ARG_TYPE_F64: case SLJIT_ARG_TYPE_F64:
offset -= SSIZE_OF(f64); offset -= SSIZE_OF(f64);
FAIL_IF(emit_sse2_store(compiler, 0, base_reg, offset, float_arg_count)); FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
float_arg_count--; float_arg_count--;
break; break;
case SLJIT_ARG_TYPE_F32: case SLJIT_ARG_TYPE_F32:
offset -= SSIZE_OF(f32); offset -= SSIZE_OF(f32);
FAIL_IF(emit_sse2_store(compiler, 0, base_reg, offset, float_arg_count)); FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
float_arg_count--; float_arg_count--;
break; break;
default: default:
@ -936,9 +966,9 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
offset -= SSIZE_OF(sw); offset -= SSIZE_OF(sw);
if (r2_offset != 0) { if (r2_offset != 0) {
EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
} else } else
EMIT_MOV(compiler, base_reg, offset, SLJIT_R0, 0); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
break; break;
case 2: case 2:
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
@ -946,15 +976,15 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
break; break;
#endif #endif
offset -= SSIZE_OF(sw); offset -= SSIZE_OF(sw);
EMIT_MOV(compiler, base_reg, offset, SLJIT_R1, 0); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
break; break;
case 3: case 3:
offset -= SSIZE_OF(sw); offset -= SSIZE_OF(sw);
break; break;
case 4: case 4:
offset -= SSIZE_OF(sw); offset -= SSIZE_OF(sw);
EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw)); EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
break; break;
} }
word_arg_count--; word_arg_count--;
@ -963,22 +993,12 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
types >>= SLJIT_ARG_SHIFT; types >>= SLJIT_ARG_SHIFT;
} }
#if !defined(__APPLE__) BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
if (compiler->options & SLJIT_F64_ALIGNMENT) {
EMIT_MOV(compiler, SLJIT_SP, 0, TMP_REG1, 0);
} else {
#endif /* !__APPLE__ */
FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
#if !defined(__APPLE__)
}
#endif /* !__APPLE__ */
FAIL_IF(emit_stack_frame_release(compiler)); FAIL_IF(emit_stack_frame_release(compiler));
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (args_size < prev_args_size) if (args_size < prev_args_size)
FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0);
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, prev_args_size - args_size));
#endif #endif
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
@ -994,56 +1014,31 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
if (word_arg_count >= 3) if (word_arg_count >= 3)
stack_size += SSIZE_OF(sw); stack_size += SSIZE_OF(sw);
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
prev_args_size = compiler->args_size;
#else
prev_args_size = 0; prev_args_size = 0;
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (!(compiler->options & SLJIT_ENTER_CDECL))
prev_args_size = compiler->args_size;
#endif #endif
prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size; prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
min_size = prev_stack_size + compiler->local_size; min_size = prev_stack_size + compiler->local_size;
base_reg = SLJIT_MEM1(SLJIT_SP); word_arg4_offset = compiler->scratches_offset;
word_arg4_offset = compiler->saveds_offset - SSIZE_OF(sw);
#if !defined(__APPLE__)
if (compiler->options & SLJIT_F64_ALIGNMENT) {
min_size += 2 * SSIZE_OF(sw);
if (stack_size < min_size)
stack_size = min_size;
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size);
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, stack_size - prev_stack_size));
inst = emit_x86_instruction(compiler, 1, SLJIT_SP, 0, TMP_REG1, 0);
FAIL_IF(!inst);
*inst = XCHG_r_rm;
if (src == SLJIT_MEM1(SLJIT_SP))
src = SLJIT_MEM1(TMP_REG1);
base_reg = SLJIT_MEM1(TMP_REG1);
} else {
#endif /* !__APPLE__ */
if (stack_size > min_size) { if (stack_size > min_size) {
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size - min_size));
if (src == SLJIT_MEM1(SLJIT_SP)) if (src == SLJIT_MEM1(SLJIT_SP))
srcw += stack_size - min_size; srcw += stack_size - min_size;
word_arg4_offset += stack_size - min_size; word_arg4_offset += stack_size - min_size;
} }
else else
stack_size = min_size; stack_size = min_size;
#if !defined(__APPLE__)
}
#endif /* !__APPLE__ */
if (word_arg_count >= 3) { if (word_arg_count >= 3) {
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
if (word_arg_count >= 4) if (word_arg_count >= 4)
EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, word_arg4_offset); EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
} }
if (!(src & SLJIT_IMM) && src != SLJIT_R0) { if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
@ -1184,8 +1179,8 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
if (offset == 0) if (offset == 0)
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
return emit_cum_binary(compiler, BINARY_OPCODE(ADD), BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, offset); return SLJIT_SUCCESS;
} }
static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space) static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
@ -1193,8 +1188,7 @@ static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 e
/* Called when stack consumption cannot be reduced to 0. */ /* Called when stack consumption cannot be reduced to 0. */
sljit_u8 *inst; sljit_u8 *inst;
FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD), BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, extra_space));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
FAIL_IF(!inst); FAIL_IF(!inst);
@ -1321,7 +1315,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args)); FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args));
compiler->saveds_offset += stack_size; compiler->scratches_offset += stack_size;
compiler->locals_offset += stack_size; compiler->locals_offset += stack_size;
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
@ -1330,7 +1324,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
#endif #endif
FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
compiler->saveds_offset -= stack_size; compiler->scratches_offset -= stack_size;
compiler->locals_offset -= stack_size; compiler->locals_offset -= stack_size;
return post_call_with_args(compiler, arg_types, 0); return post_call_with_args(compiler, arg_types, 0);
@ -1340,7 +1334,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
compiler->saveds_offset += stack_size; compiler->scratches_offset += stack_size;
compiler->locals_offset += stack_size; compiler->locals_offset += stack_size;
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
@ -1349,7 +1343,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
#endif #endif
FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
compiler->saveds_offset -= stack_size; compiler->scratches_offset -= stack_size;
compiler->locals_offset -= stack_size; compiler->locals_offset -= stack_size;
return post_call_with_args(compiler, arg_types, stack_size); return post_call_with_args(compiler, arg_types, stack_size);
@ -1412,8 +1406,7 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src
static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
{ {
sljit_sw size, saved_size; sljit_sw size;
sljit_s32 has_f64_aligment;
/* Don't adjust shadow stack if it isn't enabled. */ /* Don't adjust shadow stack if it isn't enabled. */
if (!cpu_has_shadow_stack()) if (!cpu_has_shadow_stack())
@ -1422,26 +1415,9 @@ static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
SLJIT_ASSERT(compiler->args_size >= 0); SLJIT_ASSERT(compiler->args_size >= 0);
SLJIT_ASSERT(compiler->local_size > 0); SLJIT_ASSERT(compiler->local_size > 0);
#if !defined(__APPLE__)
has_f64_aligment = compiler->options & SLJIT_F64_ALIGNMENT;
#else
has_f64_aligment = 0;
#endif
size = compiler->local_size; size = compiler->local_size;
saved_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
+ (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
if (has_f64_aligment) {
/* mov TMP_REG1, [esp + local_size]. */
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), size);
/* mov TMP_REG1, [TMP_REG1+ saved_size]. */
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), saved_size);
/* Move return address to [esp]. */
EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0);
size = 0;
} else
size += saved_size;
return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size); return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
} }

View File

@ -366,6 +366,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{ {
sljit_uw size; sljit_uw size;
sljit_s32 word_arg_count = 0; sljit_s32 word_arg_count = 0;
sljit_s32 saved_arg_count = 0;
sljit_s32 saved_regs_size, tmp, i; sljit_s32 saved_regs_size, tmp, i;
#ifdef _WIN64 #ifdef _WIN64
sljit_s32 saved_float_regs_size; sljit_s32 saved_float_regs_size;
@ -455,7 +456,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
break; break;
} }
#endif /* _WIN64 */ #endif /* _WIN64 */
EMIT_MOV(compiler, SLJIT_S0 - word_arg_count, 0, tmp, 0); if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
if (tmp != SLJIT_R0 + word_arg_count)
EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
} else {
EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
saved_arg_count++;
}
word_arg_count++; word_arg_count++;
} else { } else {
#ifdef _WIN64 #ifdef _WIN64
@ -483,33 +490,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
} }
else { else {
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0); EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096); BINARY_IMM32(SUB, 1, TMP_REG1, 0);
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(2); INC_SIZE(2);
inst[0] = JNE_i8; inst[0] = JNE_i8;
inst[1] = (sljit_u8)-19; inst[1] = (sljit_u8)-21;
local_size &= 0xfff;
} }
if (local_size > 0)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
} }
#endif /* _WIN64 */ #endif /* _WIN64 */
if (local_size > 0) { if (local_size > 0)
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
}
#ifdef _WIN64 #ifdef _WIN64
if (saved_float_regs_size > 0) { if (saved_float_regs_size > 0) {

View File

@ -79,7 +79,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
#define CHECK_EXTRA_REGS(p, w, do) \ #define CHECK_EXTRA_REGS(p, w, do) \
if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
if (p <= compiler->scratches) \ if (p <= compiler->scratches) \
w = compiler->saveds_offset - ((p) - SLJIT_R2) * SSIZE_OF(sw); \ w = compiler->scratches_offset + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
else \ else \
w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \ w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \
p = SLJIT_MEM1(SLJIT_SP); \ p = SLJIT_MEM1(SLJIT_SP); \
@ -208,6 +208,7 @@ static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define JMP_i32 0xe9 #define JMP_i32 0xe9
#define JMP_rm (/* GROUP_FF */ 4 << 3) #define JMP_rm (/* GROUP_FF */ 4 << 3)
#define LEA_r_m 0x8d #define LEA_r_m 0x8d
#define LOOP_i8 0xe2
#define MOV_r_rm 0x8b #define MOV_r_rm 0x8b
#define MOV_r_i32 0xb8 #define MOV_r_i32 0xb8
#define MOV_rm_r 0x89 #define MOV_rm_r 0x89
@ -386,10 +387,12 @@ static sljit_u8 get_jump_code(sljit_uw type)
return 0x85 /* jne */; return 0x85 /* jne */;
case SLJIT_LESS: case SLJIT_LESS:
case SLJIT_CARRY:
case SLJIT_LESS_F64: case SLJIT_LESS_F64:
return 0x82 /* jc */; return 0x82 /* jc */;
case SLJIT_GREATER_EQUAL: case SLJIT_GREATER_EQUAL:
case SLJIT_NOT_CARRY:
case SLJIT_GREATER_EQUAL_F64: case SLJIT_GREATER_EQUAL_F64:
return 0x83 /* jae */; return 0x83 /* jae */;
@ -685,17 +688,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode)) #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, #define BINARY_IMM32(op_imm, immw, arg, argw) \
sljit_u32 op_types, do { \
sljit_s32 dst, sljit_sw dstw, inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
sljit_s32 src1, sljit_sw src1w, FAIL_IF(!inst); \
sljit_s32 src2, sljit_sw src2w); *(inst + 1) |= (op_imm); \
} while (0)
static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler, #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
sljit_u32 op_types,
sljit_s32 dst, sljit_sw dstw, #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
sljit_s32 src1, sljit_sw src1w, do { \
sljit_s32 src2, sljit_sw src2w); if (IS_HALFWORD(immw) || compiler->mode32) { \
BINARY_IMM32(op_imm, immw, arg, argw); \
} \
else { \
FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
FAIL_IF(!inst); \
*inst = (op_mr); \
} \
} while (0)
#define BINARY_EAX_IMM(op_eax_imm, immw) \
FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
#else /* !SLJIT_CONFIG_X86_64 */
#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
BINARY_IMM32(op_imm, immw, arg, argw)
#define BINARY_EAX_IMM(op_eax_imm, immw) \
FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
#endif /* SLJIT_CONFIG_X86_64 */
static sljit_s32 emit_mov(struct sljit_compiler *compiler, static sljit_s32 emit_mov(struct sljit_compiler *compiler,
sljit_s32 dst, sljit_sw dstw, sljit_s32 dst, sljit_sw dstw,
@ -1551,9 +1577,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return emit_not_with_flags(compiler, dst, dstw, src, srcw); return emit_not_with_flags(compiler, dst, dstw, src, srcw);
return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
case SLJIT_NEG:
return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
case SLJIT_CLZ: case SLJIT_CLZ:
return emit_clz(compiler, op_flags, dst, dstw, src, srcw); return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
} }
@ -1561,36 +1584,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
if (IS_HALFWORD(immw) || compiler->mode32) { \
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
FAIL_IF(!inst); \
*(inst + 1) |= (op_imm); \
} \
else { \
FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
FAIL_IF(!inst); \
*inst = (op_mr); \
}
#define BINARY_EAX_IMM(op_eax_imm, immw) \
FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
#else
#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
FAIL_IF(!inst); \
*(inst + 1) |= (op_imm);
#define BINARY_EAX_IMM(op_eax_imm, immw) \
FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
#endif
static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler, static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
sljit_u32 op_types, sljit_u32 op_types,
sljit_s32 dst, sljit_sw dstw, sljit_s32 dst, sljit_sw dstw,
@ -2267,6 +2260,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
return emit_cum_binary(compiler, BINARY_OPCODE(ADC), return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
dst, dstw, src1, src1w, src2, src2w); dst, dstw, src1, src1w, src2, src2w);
case SLJIT_SUB: case SLJIT_SUB:
if (src1 == SLJIT_IMM && src1w == 0)
return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
if (!HAS_FLAGS(op)) { if (!HAS_FLAGS(op)) {
if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
return compiler->error; return compiler->error;