diff --git a/Makefile.am b/Makefile.am index b1adb6f..27313d5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -452,9 +452,10 @@ EXTRA_DIST += \ src/sljit/sljitNativePPC_32.c \ src/sljit/sljitNativePPC_64.c \ src/sljit/sljitNativePPC_common.c \ + src/sljit/sljitNativeRISCV_32.c \ + src/sljit/sljitNativeRISCV_64.c \ + src/sljit/sljitNativeRISCV_common.c \ src/sljit/sljitNativeS390X.c \ - src/sljit/sljitNativeSPARC_32.c \ - src/sljit/sljitNativeSPARC_common.c \ src/sljit/sljitNativeX86_32.c \ src/sljit/sljitNativeX86_64.c \ src/sljit/sljitNativeX86_common.c \ diff --git a/src/sljit/sljitConfig.h b/src/sljit/sljitConfig.h index 1c821d2..5fba7aa 100644 --- a/src/sljit/sljitConfig.h +++ b/src/sljit/sljitConfig.h @@ -53,7 +53,8 @@ extern "C" { /* #define SLJIT_CONFIG_PPC_64 1 */ /* #define SLJIT_CONFIG_MIPS_32 1 */ /* #define SLJIT_CONFIG_MIPS_64 1 */ -/* #define SLJIT_CONFIG_SPARC_32 1 */ +/* #define SLJIT_CONFIG_RISCV_32 1 */ +/* #define SLJIT_CONFIG_RISCV_64 1 */ /* #define SLJIT_CONFIG_S390X 1 */ /* #define SLJIT_CONFIG_AUTO 1 */ @@ -127,17 +128,6 @@ extern "C" { #endif /* !SLJIT_EXECUTABLE_ALLOCATOR */ -/* Force cdecl calling convention even if a better calling - convention (e.g. fastcall) is supported by the C compiler. - If this option is disabled (this is the default), functions - called from JIT should be defined with SLJIT_FUNC attribute. - Standard C functions can still be called by using the - SLJIT_CALL_CDECL jump type. */ -#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION -/* Disabled by default */ -#define SLJIT_USE_CDECL_CALLING_CONVENTION 0 -#endif - /* Return with error when an invalid argument is passed. */ #ifndef SLJIT_ARGUMENT_CHECKS /* Disabled by default */ diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h index 55e4e39..63f8210 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/src/sljit/sljitConfigInternal.h @@ -59,7 +59,8 @@ extern "C" { SLJIT_64BIT_ARCHITECTURE : 64 bit architecture SLJIT_LITTLE_ENDIAN : little endian architecture SLJIT_BIG_ENDIAN : big endian architecture - SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) + SLJIT_UNALIGNED : unaligned memory accesses for non-fpu operations are supported + SLJIT_FPU_UNALIGNED : unaligned memory accesses for fpu operations are supported SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information Constants: @@ -98,7 +99,8 @@ extern "C" { + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ - + (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + + (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ + + (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ + (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 @@ -115,7 +117,8 @@ extern "C" { && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ && !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ - && !(defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + && !(defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) \ + && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) \ && !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) \ && !(defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) @@ -156,8 +159,10 @@ extern "C" { #define SLJIT_CONFIG_MIPS_32 1 #elif defined(__mips64) #define SLJIT_CONFIG_MIPS_64 1 -#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64) -#define SLJIT_CONFIG_SPARC_32 1 +#elif defined (__riscv_xlen) && (__riscv_xlen == 32) +#define SLJIT_CONFIG_RISCV_32 1 +#elif defined (__riscv_xlen) && (__riscv_xlen == 64) +#define SLJIT_CONFIG_RISCV_64 1 #elif defined(__s390x__) #define SLJIT_CONFIG_S390X 1 #else @@ -205,8 +210,8 @@ extern "C" { #define SLJIT_CONFIG_PPC 1 #elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) #define SLJIT_CONFIG_MIPS 1 -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64) -#define SLJIT_CONFIG_SPARC 1 +#elif (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +#define SLJIT_CONFIG_RISCV 1 #endif /***********************************************************/ @@ -330,8 +335,14 @@ extern "C" { * older versions are known to abort in some targets * https://github.com/PhilipHazel/pcre2/issues/92 * - * beware APPLE is known to have removed the code in iOS so - * it will need to be excempted or result in broken builds + * beware some vendors (ex: Microsoft, Apple) are known to have + * removed the code to support this builtin even if the call for + * __has_builtin reports it is available. + * + * make sure linking doesn't fail because __clear_cache() is + * missing before changing it or add an exception so that the + * system provided method that should be defined below is used + * instead. */ #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) #if __has_builtin(__builtin___clear_cache) && !defined(__clang__) @@ -339,9 +350,9 @@ extern "C" { /* * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248 * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811 - * gcc's clear_cache builtin for power and sparc are broken + * gcc's clear_cache builtin for power is broken */ -#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32) +#if !defined(SLJIT_CONFIG_PPC) #define SLJIT_CACHE_FLUSH(from, to) \ __builtin___clear_cache((char*)(from), (char*)(to)) #endif @@ -373,12 +384,10 @@ extern "C" { ppc_cache_flush((from), (to)) #define SLJIT_CACHE_FLUSH_OWN_IMPL 1 -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#elif defined(_WIN32) -/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ #define SLJIT_CACHE_FLUSH(from, to) \ - sparc_cache_flush((from), (to)) -#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 + FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) #elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__) @@ -392,11 +401,6 @@ extern "C" { #define SLJIT_CACHE_FLUSH(from, to) \ cacheflush((long)(from), (long)(to), 0) -#elif defined _WIN32 - -#define SLJIT_CACHE_FLUSH(from, to) \ - FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from)) - #else /* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */ @@ -435,6 +439,7 @@ typedef long int sljit_sw; && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) \ && !(defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) #define SLJIT_32BIT_ARCHITECTURE 1 #define SLJIT_WORD_SHIFT 2 @@ -495,8 +500,7 @@ typedef double sljit_f64; #if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) /* These macros are mostly useful for the applications. */ -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) #ifdef __LITTLE_ENDIAN__ #define SLJIT_LITTLE_ENDIAN 1 @@ -504,8 +508,7 @@ typedef double sljit_f64; #define SLJIT_BIG_ENDIAN 1 #endif -#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ - || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) #ifdef __MIPSEL__ #define SLJIT_LITTLE_ENDIAN 1 @@ -532,8 +535,7 @@ typedef double sljit_f64; #endif /* !SLJIT_MIPS_REV */ -#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ - || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) #define SLJIT_BIG_ENDIAN 1 @@ -554,19 +556,30 @@ typedef double sljit_f64; #ifndef SLJIT_UNALIGNED -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ - || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ - || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ - || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) #define SLJIT_UNALIGNED 1 #endif #endif /* !SLJIT_UNALIGNED */ +#ifndef SLJIT_FPU_UNALIGNED + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) +#define SLJIT_FPU_UNALIGNED 1 +#endif + +#endif /* !SLJIT_FPU_UNALIGNED */ + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) /* Auto detect SSE2 support using CPUID. On 64 bit x86 cpus, sse2 must be present. */ @@ -578,38 +591,7 @@ typedef double sljit_f64; /*****************************************************************************************/ #ifndef SLJIT_FUNC - -#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION) \ - || !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - #define SLJIT_FUNC - -#elif defined(__GNUC__) && !defined(__APPLE__) - -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) -#define SLJIT_FUNC __attribute__ ((fastcall)) -#define SLJIT_X86_32_FASTCALL 1 -#else -#define SLJIT_FUNC -#endif /* gcc >= 3.4 */ - -#elif defined(_MSC_VER) - -#define SLJIT_FUNC __fastcall -#define SLJIT_X86_32_FASTCALL 1 - -#elif defined(__BORLANDC__) - -#define SLJIT_FUNC __msfastcall -#define SLJIT_X86_32_FASTCALL 1 - -#else /* Unknown compiler. */ - -/* The cdecl calling convention is usually the x86 default. */ -#define SLJIT_FUNC - -#endif /* SLJIT_USE_CDECL_CALLING_CONVENTION */ - #endif /* !SLJIT_FUNC */ #ifndef SLJIT_INDIRECT_CALL @@ -624,11 +606,7 @@ typedef double sljit_f64; /* The offset which needs to be substracted from the return address to determine the next executed instruction after return. */ #ifndef SLJIT_RETURN_ADDRESS_OFFSET -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define SLJIT_RETURN_ADDRESS_OFFSET 8 -#else #define SLJIT_RETURN_ADDRESS_OFFSET 0 -#endif #endif /* SLJIT_RETURN_ADDRESS_OFFSET */ /***************************************************/ @@ -740,17 +718,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8 #endif -#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) -#define SLJIT_NUMBER_OF_REGISTERS 18 -#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 -#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14 -#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -/* saved registers (16), return struct pointer (1), space for 6 argument words (1), - 4th double arg (2), double alignment (1). */ -#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * (sljit_s32)sizeof(sljit_sw)) -#endif +#define SLJIT_NUMBER_OF_REGISTERS 23 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12 +#define SLJIT_LOCALS_OFFSET_BASE 0 +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30 +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12 #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) @@ -806,7 +780,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); #if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ || (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ - || (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) \ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) #define SLJIT_HAS_STATUS_FLAGS_STATE 1 #endif diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c index 313a061..4e25556 100644 --- a/src/sljit/sljitLir.c +++ b/src/sljit/sljitLir.c @@ -133,6 +133,9 @@ #define SLJIT_ARG_MASK 0x7 #define SLJIT_ARG_FULL_MASK (SLJIT_ARG_MASK | SLJIT_ARG_TYPE_SCRATCH_REG) +/* Mask for sljit_emit_enter. */ +#define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3) + /* Jump flags. */ #define JUMP_LABEL 0x1 #define JUMP_ADDR 0x2 @@ -145,16 +148,16 @@ # define PATCH_MD 0x10 #endif # define TYPE_SHIFT 13 -#endif +#endif /* SLJIT_CONFIG_X86 */ #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) # define IS_BL 0x4 # define PATCH_B 0x8 -#endif +#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */ #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) # define CPOOL_SIZE 512 -#endif +#endif /* SLJIT_CONFIG_ARM_V5 */ #if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) # define IS_COND 0x04 @@ -172,7 +175,7 @@ /* BL + imm24 */ # define PATCH_BL 0x60 /* 0xf00 cc code for branches */ -#endif +#endif /* SLJIT_CONFIG_ARM_THUMB2 */ #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) # define IS_COND 0x004 @@ -182,7 +185,7 @@ # define PATCH_COND 0x040 # define PATCH_ABS48 0x080 # define PATCH_ABS64 0x100 -#endif +#endif /* SLJIT_CONFIG_ARM_64 */ #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) # define IS_COND 0x004 @@ -192,9 +195,9 @@ #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) # define PATCH_ABS32 0x040 # define PATCH_ABS48 0x080 -#endif +#endif /* SLJIT_CONFIG_PPC_64 */ # define REMOVE_COND 0x100 -#endif +#endif /* SLJIT_CONFIG_PPC */ #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) # define IS_MOVABLE 0x004 @@ -212,7 +215,7 @@ #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) # define PATCH_ABS32 0x400 # define PATCH_ABS48 0x800 -#endif +#endif /* SLJIT_CONFIG_MIPS_64 */ /* instruction types */ # define MOVABLE_INS 0 @@ -221,28 +224,24 @@ # define UNMOVABLE_INS 32 /* FPU status register */ # define FCSR_FCC 33 -#endif +#endif /* SLJIT_CONFIG_MIPS */ -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -# define IS_MOVABLE 0x04 -# define IS_COND 0x08 -# define IS_CALL 0x10 +#if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) +# define IS_COND 0x004 +# define IS_CALL 0x008 -# define PATCH_B 0x20 -# define PATCH_CALL 0x40 +# define PATCH_B 0x010 +# define PATCH_J 0x020 - /* instruction types */ -# define MOVABLE_INS 0 - /* 1 - 31 last destination register */ - /* no destination (i.e: store) */ -# define UNMOVABLE_INS 32 - -# define DST_INS_MASK 0xff - - /* ICC_SET is the same as SET_FLAGS. */ -# define ICC_IS_SET (1 << 23) -# define FCC_IS_SET (1 << 24) -#endif +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +# define PATCH_REL32 0x040 +# define PATCH_ABS32 0x080 +# define PATCH_ABS44 0x100 +# define PATCH_ABS52 0x200 +#else /* !SLJIT_CONFIG_RISCV_64 */ +# define PATCH_REL32 0x0 +#endif /* SLJIT_CONFIG_RISCV_64 */ +#endif /* SLJIT_CONFIG_RISCV */ /* Stack management. */ @@ -385,7 +384,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo invalid_integer_types); SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_32, rewritable_jump_and_single_op_must_not_be_the_same); - SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_EQUAL_F64 & 0x1) && !(SLJIT_JUMP & 0x1), + SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_F_EQUAL & 0x1) && !(SLJIT_JUMP & 0x1), conditional_flags_must_be_even_numbers); /* Only the non-zero members must be set. */ @@ -437,10 +436,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo compiler->delay_slot = UNMOVABLE_INS; #endif -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - compiler->delay_slot = UNMOVABLE_INS; -#endif - #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->last_flags = 0; @@ -822,6 +817,9 @@ static sljit_s32 function_check_src_mem(struct sljit_compiler *compiler, sljit_s if (!(p & SLJIT_MEM)) return 0; + if (p == SLJIT_MEM1(SLJIT_SP)) + return (i >= 0 && i < compiler->logical_local_size); + if (!(!(p & REG_MASK) || FUNCTION_CHECK_IS_REG(p & REG_MASK))) return 0; @@ -859,9 +857,6 @@ static sljit_s32 function_check_src(struct sljit_compiler *compiler, sljit_s32 p if (p == SLJIT_IMM) return 1; - if (p == SLJIT_MEM1(SLJIT_SP)) - return (i >= 0 && i < compiler->logical_local_size); - return function_check_src_mem(compiler, p, i); } @@ -876,9 +871,6 @@ static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p if (FUNCTION_CHECK_IS_REG(p)) return (i == 0); - if (p == SLJIT_MEM1(SLJIT_SP)) - return (i >= 0 && i < compiler->logical_local_size); - return function_check_src_mem(compiler, p, i); } @@ -893,9 +885,6 @@ static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, s if (FUNCTION_CHECK_IS_FREG(p)) return (i == 0); - if (p == SLJIT_MEM1(SLJIT_SP)) - return (i >= 0 && i < compiler->logical_local_size); - return function_check_src_mem(compiler, p, i); } @@ -913,7 +902,11 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #ifdef _WIN64 +#ifdef __GNUC__ +# define SLJIT_PRINT_D "ll" +#else # define SLJIT_PRINT_D "I64" +#endif #else # define SLJIT_PRINT_D "l" #endif @@ -1020,10 +1013,6 @@ static const char* fop2_names[] = { "add", "sub", "mul", "div" }; -#define JUMP_POSTFIX(type) \ - ((type & 0xff) <= SLJIT_NOT_OVERFLOW ? ((type & SLJIT_32) ? "32" : "") \ - : ((type & 0xff) <= SLJIT_ORDERED_F64 ? ((type & SLJIT_32) ? ".f32" : ".f64") : "")) - static const char* jump_names[] = { "equal", "not_equal", "less", "greater_equal", @@ -1032,12 +1021,18 @@ static const char* jump_names[] = { "sig_greater", "sig_less_equal", "overflow", "not_overflow", "carry", "", - "equal", "not_equal", - "less", "greater_equal", - "greater", "less_equal", + "f_equal", "f_not_equal", + "f_less", "f_greater_equal", + "f_greater", "f_less_equal", "unordered", "ordered", + "ordered_equal", "unordered_or_not_equal", + "ordered_less", "unordered_or_greater_equal", + "ordered_greater", "unordered_or_less_equal", + "unordered_or_equal", "ordered_not_equal", + "unordered_or_less", "ordered_greater_equal", + "unordered_or_greater", "ordered_less_equal", "jump", "fast_call", - "call", "call.cdecl" + "call", "call_reg_arg" }; static const char* call_arg_names[] = { @@ -1053,6 +1048,8 @@ static const char* call_arg_names[] = { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +#define SLJIT_SKIP_CHECKS(compiler) (compiler)->skip_checks = 1 + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_compiler *compiler) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1080,7 +1077,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL)); + if (options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG))); + } else { + CHECK_ARGUMENT(options == 0); + } + CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); @@ -1089,7 +1091,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); - CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches)); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : saveds, fscratches)); compiler->last_flags = 0; #endif @@ -1109,8 +1111,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil } while (arg_types); } - fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", - (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "", + fprintf(compiler->verbose, "],"); + + if (options & SLJIT_ENTER_REG_ARG) { + fprintf(compiler->verbose, " enter:reg_arg,"); + + if (SLJIT_KEPT_SAVEDS_COUNT(options) > 0) + fprintf(compiler->verbose, " keep:%d,", SLJIT_KEPT_SAVEDS_COUNT(options)); + } + + fprintf(compiler->verbose, "scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", scratches, saveds, fscratches, fsaveds, local_size); } #endif @@ -1124,7 +1134,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_ENTER_CDECL)); + if (options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT(!(options & ~(0x3 | SLJIT_ENTER_REG_ARG))); + } else { + CHECK_ARGUMENT(options == 0); + } + CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); @@ -1133,7 +1148,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); - CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, saveds, fscratches)); + CHECK_ARGUMENT(function_check_arguments(arg_types, scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : saveds, fscratches)); compiler->last_flags = 0; #endif @@ -1153,8 +1168,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi } while (arg_types); } - fprintf(compiler->verbose, "],%s scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", - (options & SLJIT_ENTER_CDECL) ? " enter:cdecl," : "", + fprintf(compiler->verbose, "],"); + + if (options & SLJIT_ENTER_REG_ARG) { + fprintf(compiler->verbose, " enter:reg_arg,"); + + if (SLJIT_KEPT_SAVEDS_COUNT(options) > 0) + fprintf(compiler->verbose, " keep:%d,", SLJIT_KEPT_SAVEDS_COUNT(options)); + } + + fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", scratches, saveds, fscratches, fsaveds, local_size); } #endif @@ -1510,7 +1533,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_32 | SLJIT_SET_Z)); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & SLJIT_32); #endif if (SLJIT_UNLIKELY(compiler->skip_checks)) { @@ -1523,7 +1546,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64); CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK) - || (GET_FLAG_TYPE(op) >= SLJIT_EQUAL_F64 && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_F64)); + || (GET_FLAG_TYPE(op) >= SLJIT_F_EQUAL && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_LESS_EQUAL)); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); #endif @@ -1531,7 +1554,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_32) ? ".f32" : ".f64"); if (op & VARIABLE_FLAG_MASK) { - fprintf(compiler->verbose, ".%s_f", jump_names[GET_FLAG_TYPE(op)]); + fprintf(compiler->verbose, ".%s", jump_names[GET_FLAG_TYPE(op)]); } fprintf(compiler->verbose, " "); sljit_verbose_fparam(compiler, src1, src1w); @@ -1650,6 +1673,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compil CHECK_RETURN_OK; } +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#define CHECK_UNORDERED(type, last_flags) \ + ((((type) & 0xff) == SLJIT_UNORDERED || ((type) & 0xff) == SLJIT_ORDERED) && \ + ((last_flags) & 0xff) >= SLJIT_UNORDERED && ((last_flags) & 0xff) <= SLJIT_ORDERED_LESS_EQUAL) +#else +#define CHECK_UNORDERED(type, last_flags) 0 +#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) { if (SLJIT_UNLIKELY(compiler->skip_checks)) { @@ -1658,9 +1692,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_FAST_CALL); - CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_32)); if ((type & 0xff) < SLJIT_JUMP) { if ((type & 0xff) <= SLJIT_NOT_ZERO) @@ -1670,13 +1703,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile compiler->last_flags = 0; } else CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || CHECK_UNORDERED(type, compiler->last_flags)); } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " jump%s %s%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - jump_names[type & 0xff], JUMP_POSTFIX(type)); + fprintf(compiler->verbose, " jump%s %s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + jump_names[type & 0xff]); #endif CHECK_RETURN_OK; } @@ -1686,11 +1720,17 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_call(struct sljit_compile { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_CALL_RETURN))); - CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_CALL && (type & 0xff) <= SLJIT_CALL_REG_ARG); CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); if (type & SLJIT_CALL_RETURN) { CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); + + if (compiler->options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL_REG_ARG); + } else { + CHECK_ARGUMENT((type & 0xff) != SLJIT_CALL_REG_ARG); + } } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1729,8 +1769,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " cmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - jump_names[type & 0xff], (type & SLJIT_32) ? "32" : ""); + fprintf(compiler->verbose, " cmp%s%s %s, ", (type & SLJIT_32) ? "32" : "", + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); sljit_verbose_param(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src2, src2w); @@ -1747,15 +1787,16 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_32))); - CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_F_EQUAL && (type & 0xff) <= SLJIT_ORDERED_LESS_EQUAL + && ((type & 0xff) <= SLJIT_ORDERED || sljit_cmp_info(type & 0xff))); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " fcmp%s %s%s, ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", - jump_names[type & 0xff], (type & SLJIT_32) ? ".f32" : ".f64"); + fprintf(compiler->verbose, " fcmp%s%s %s, ", (type & SLJIT_32) ? ".f32" : ".f64", + !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", jump_names[type & 0xff]); sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src2, src2w); @@ -1793,12 +1834,18 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_icall(struct sljit_compil { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_CALL_RETURN))); - CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL || (type & 0xff) == SLJIT_CALL_CDECL); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_CALL && (type & 0xff) <= SLJIT_CALL_REG_ARG); CHECK_ARGUMENT(function_check_arguments(arg_types, compiler->scratches, -1, compiler->fscratches)); FUNCTION_CHECK_SRC(src, srcw); if (type & SLJIT_CALL_RETURN) { CHECK_ARGUMENT((arg_types & SLJIT_ARG_MASK) == compiler->last_return); + + if (compiler->options & SLJIT_ENTER_REG_ARG) { + CHECK_ARGUMENT((type & 0xff) == SLJIT_CALL_REG_ARG); + } else { + CHECK_ARGUMENT((type & 0xff) != SLJIT_CALL_REG_ARG); + } } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1830,18 +1877,18 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com sljit_s32 type) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32))); - CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); + CHECK_ARGUMENT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); - if ((type & 0xff) <= SLJIT_NOT_ZERO) + if (type <= SLJIT_NOT_ZERO) CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else - CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); + CHECK_ARGUMENT(type == (compiler->last_flags & 0xff) + || (type == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY) + || (type == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || CHECK_UNORDERED(type, compiler->last_flags)); FUNCTION_CHECK_DST(dst, dstw); @@ -1850,12 +1897,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_com #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " flags%s %s%s, ", - !(op & SLJIT_SET_Z) ? "" : ".z", + fprintf(compiler->verbose, " flags.%s%s%s ", GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], - GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : "")); + GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_32) ? "32" : ""), + !(op & SLJIT_SET_Z) ? "" : ".z"); sljit_verbose_param(compiler, dst, dstw); - fprintf(compiler->verbose, ", %s%s\n", jump_names[type & 0xff], JUMP_POSTFIX(type)); + fprintf(compiler->verbose, ", %s\n", jump_names[type]); } #endif CHECK_RETURN_OK; @@ -1866,8 +1913,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile sljit_s32 src, sljit_sw srcw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_32))); - CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); + CHECK_ARGUMENT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_32)); @@ -1876,17 +1922,19 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compile CHECK_ARGUMENT(srcw == 0); } - if ((type & 0xff) <= SLJIT_NOT_ZERO) + if (type <= SLJIT_NOT_ZERO) CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); else - CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) - || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW)); + CHECK_ARGUMENT(type == (compiler->last_flags & 0xff) + || (type == SLJIT_NOT_CARRY && (compiler->last_flags & 0xff) == SLJIT_CARRY) + || (type == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || CHECK_UNORDERED(type, compiler->last_flags)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " cmov%s %s%s, ", + fprintf(compiler->verbose, " cmov%s %s, ", !(dst_reg & SLJIT_32) ? "" : "32", - jump_names[type & 0xff], JUMP_POSTFIX(type)); + jump_names[type]); sljit_verbose_reg(compiler, dst_reg & ~SLJIT_32); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src, srcw); @@ -1901,27 +1949,63 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_mem(struct sljit_compiler sljit_s32 mem, sljit_sw memw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + sljit_s32 allowed_flags; + CHECK_ARGUMENT((type & 0xff) >= SLJIT_MOV && (type & 0xff) <= SLJIT_MOV_P); - CHECK_ARGUMENT(!(type & SLJIT_32) || ((type & 0xff) != SLJIT_MOV && (type & 0xff) != SLJIT_MOV_U32 && (type & 0xff) != SLJIT_MOV_P)); - CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); - CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); - CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + CHECK_ARGUMENT(!(type & SLJIT_32) || ((type & 0xff) >= SLJIT_MOV_U8 && (type & 0xff) <= SLJIT_MOV_S16)); + + if (type & SLJIT_MEM_UNALIGNED) { + allowed_flags = SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32; + + switch (type & 0xff) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + allowed_flags = 0; + break; + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: + allowed_flags = SLJIT_MEM_ALIGNED_16; + break; + } + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | allowed_flags)) == 0); + CHECK_ARGUMENT((type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) != (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)); + } else { + CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); + CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + CHECK_ARGUMENT((mem & REG_MASK) != 0 && (mem & REG_MASK) != reg); + } FUNCTION_CHECK_SRC_MEM(mem, memw); CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(reg)); - - CHECK_ARGUMENT((mem & REG_MASK) != 0 && (mem & REG_MASK) != reg); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (!(type & SLJIT_MEM_SUPP) && SLJIT_UNLIKELY(!!compiler->verbose)) { - if (sljit_emit_mem(compiler, type | SLJIT_MEM_SUPP, reg, mem, memw) == SLJIT_ERR_UNSUPPORTED) - fprintf(compiler->verbose, " //"); + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) { + if (type & SLJIT_MEM_SUPP) + CHECK_RETURN_OK; + if (sljit_emit_mem(compiler, type | SLJIT_MEM_SUPP, reg, mem, memw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " // mem: unsupported form, no instructions are emitted"); + CHECK_RETURN_OK; + } + } - fprintf(compiler->verbose, " mem%s.%s%s%s ", - !(type & SLJIT_32) ? "" : "32", - (type & SLJIT_MEM_STORE) ? "st" : "ld", - op1_names[(type & 0xff) - SLJIT_OP1_BASE], - (type & SLJIT_MEM_PRE) ? ".pre" : ".post"); + if ((type & 0xff) == SLJIT_MOV32) + fprintf(compiler->verbose, " mem32.%s", + (type & SLJIT_MEM_STORE) ? "st" : "ld"); + else + fprintf(compiler->verbose, " mem%s.%s%s", + !(type & SLJIT_32) ? "" : "32", + (type & SLJIT_MEM_STORE) ? "st" : "ld", + op1_names[(type & 0xff) - SLJIT_OP1_BASE]); + + if (type & SLJIT_MEM_UNALIGNED) { + printf(".un%s%s ", (type & SLJIT_MEM_ALIGNED_16) ? ".16" : "", (type & SLJIT_MEM_ALIGNED_32) ? ".32" : ""); + } else + printf((type & SLJIT_MEM_PRE) ? ".pre " : ".post "); sljit_verbose_reg(compiler, reg); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, mem, memw); @@ -1937,22 +2021,37 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fmem(struct sljit_compile { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT((type & 0xff) == SLJIT_MOV_F64); - CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); - CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); - CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + if (type & SLJIT_MEM_UNALIGNED) { + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | (type & SLJIT_32 ? 0 : SLJIT_MEM_ALIGNED_32))) == 0); + CHECK_ARGUMENT((type & (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) != (SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)); + } else { + CHECK_ARGUMENT((type & SLJIT_MEM_PRE) || (type & SLJIT_MEM_POST)); + CHECK_ARGUMENT((type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) != (SLJIT_MEM_PRE | SLJIT_MEM_POST)); + CHECK_ARGUMENT((type & ~(0xff | SLJIT_32 | SLJIT_MEM_STORE | SLJIT_MEM_SUPP | SLJIT_MEM_PRE | SLJIT_MEM_POST)) == 0); + } FUNCTION_CHECK_SRC_MEM(mem, memw); CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(freg)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (!(type & SLJIT_MEM_SUPP) && SLJIT_UNLIKELY(!!compiler->verbose)) { - if (sljit_emit_fmem(compiler, type | SLJIT_MEM_SUPP, freg, mem, memw) == SLJIT_ERR_UNSUPPORTED) - fprintf(compiler->verbose, " //"); + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) { + if (type & SLJIT_MEM_SUPP) + CHECK_RETURN_OK; + if (sljit_emit_fmem(compiler, type | SLJIT_MEM_SUPP, freg, mem, memw) == SLJIT_ERR_UNSUPPORTED) { + fprintf(compiler->verbose, " // fmem: unsupported form, no instructions are emitted"); + CHECK_RETURN_OK; + } + } - fprintf(compiler->verbose, " fmem.%s%s%s ", + fprintf(compiler->verbose, " fmem.%s%s", (type & SLJIT_MEM_STORE) ? "st" : "ld", - !(type & SLJIT_32) ? ".f64" : ".f32", - (type & SLJIT_MEM_PRE) ? ".pre" : ".post"); + !(type & SLJIT_32) ? ".f64" : ".f32"); + + if (type & SLJIT_MEM_UNALIGNED) { + printf(".un%s%s ", (type & SLJIT_MEM_ALIGNED_16) ? ".16" : "", (type & SLJIT_MEM_ALIGNED_32) ? ".32" : ""); + } else + printf((type & SLJIT_MEM_PRE) ? ".pre " : ".post "); sljit_verbose_freg(compiler, freg); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, mem, memw); @@ -2012,6 +2111,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_co CHECK_RETURN_OK; } +#else /* !SLJIT_ARGUMENT_CHECKS && !SLJIT_VERBOSE */ + +#define SLJIT_SKIP_CHECKS(compiler) + #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ #define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ @@ -2050,15 +2153,10 @@ static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *comp return SLJIT_SUCCESS; #endif -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ - || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); } -#if !(defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { CHECK_ERROR(); @@ -2066,19 +2164,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ - || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_return_void(compiler); } -#endif - #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ - || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ - || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)) + || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)) \ + || (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, @@ -2088,31 +2181,55 @@ static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *com struct sljit_jump *jump; sljit_s32 op = (dst_reg & SLJIT_32) ? SLJIT_MOV32 : SLJIT_MOV; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); jump = sljit_emit_jump(compiler, type ^ 0x1); FAIL_IF(!jump); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_32, 0, src, srcw)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); label = sljit_emit_label(compiler); FAIL_IF(!label); + sljit_set_label(jump, label); return SLJIT_SUCCESS; } #endif +#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \ + && !(defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + SLJIT_SKIP_CHECKS(compiler); + + if (type & SLJIT_MEM_STORE) + return sljit_emit_op1(compiler, type & (0xff | SLJIT_32), mem, memw, reg, 0); + return sljit_emit_op1(compiler, type & (0xff | SLJIT_32), reg, 0, mem, memw); +} + +#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) && !SLJIT_CONFIG_ARM_V5 */ + +#if (!(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) || (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)) \ + && !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) + +static sljit_s32 sljit_emit_fmem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + SLJIT_SKIP_CHECKS(compiler); + + if (type & SLJIT_MEM_STORE) + return sljit_emit_fop1(compiler, type & (0xff | SLJIT_32), mem, memw, freg, 0); + return sljit_emit_fop1(compiler, type & (0xff | SLJIT_32), freg, 0, mem, memw); +} + +#endif /* (!SLJIT_CONFIG_MIPS || SLJIT_MIPS_REV >= 6) && !SLJIT_CONFIG_ARM */ + /* CPU description section */ #if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) @@ -2153,13 +2270,14 @@ static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *com # include "sljitNativePPC_common.c" #elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) # include "sljitNativeMIPS_common.c" -#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) -# include "sljitNativeSPARC_common.c" +#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) +# include "sljitNativeRISCV_common.c" #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) # include "sljitNativeS390X.c" #endif -#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ + && !(defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, @@ -2229,20 +2347,33 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler else flags = condition << VARIABLE_FLAG_SHIFT; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); PTR_FAIL_IF(sljit_emit_op2u(compiler, SLJIT_SUB | flags | (type & SLJIT_32), src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_32))); } -#endif +#endif /* !SLJIT_CONFIG_MIPS */ + +#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL) + return 0; + + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 0; + } + + return 1; +} + +#endif /* SLJIT_CONFIG_ARM */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, @@ -2251,58 +2382,47 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_32), src1, src1w, src2, src2w); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, type); } -#if !(defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \ - && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + && !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) { - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(reg); - SLJIT_UNUSED_ARG(mem); - SLJIT_UNUSED_ARG(memw); - CHECK_ERROR(); CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); - return SLJIT_ERR_UNSUPPORTED; + if (type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) + return SLJIT_ERR_UNSUPPORTED; + + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); } #endif -#if !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \ + && !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \ && !(defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw) { - SLJIT_UNUSED_ARG(compiler); - SLJIT_UNUSED_ARG(type); - SLJIT_UNUSED_ARG(freg); - SLJIT_UNUSED_ARG(mem); - SLJIT_UNUSED_ARG(memw); - CHECK_ERROR(); CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); - return SLJIT_ERR_UNSUPPORTED; + if (type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) + return SLJIT_ERR_UNSUPPORTED; + + return sljit_emit_fmem_unaligned(compiler, type, freg, mem, memw); } #endif @@ -2316,10 +2436,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + + SLJIT_SKIP_CHECKS(compiler); + if (offset != 0) return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); @@ -2387,6 +2506,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return 0; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + SLJIT_UNUSED_ARG(type); + SLJIT_UNREACHABLE(); + return 0; +} + SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code, void *exec_allocator_data) { SLJIT_UNUSED_ARG(code); diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h index 1162658..d4414f1 100644 --- a/src/sljit/sljitLir.h +++ b/src/sljit/sljitLir.h @@ -488,8 +488,7 @@ struct sljit_compiler { sljit_uw args_size; #endif -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - sljit_s32 delay_slot; +#if (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV) sljit_s32 cache_arg; sljit_sw cache_argw; #endif @@ -634,6 +633,20 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); +/* If type is between SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL, + sljit_cmp_info returns one, if the cpu supports the passed floating + point comparison type. + + If type is SLJIT_UNORDERED or SLJIT_ORDERED, sljit_cmp_info returns + one, if the cpu supports checking the unordered comparison result + regardless of the comparison type passed to the comparison instruction. + The returned value is always one, if there is at least one type between + SLJIT_ORDERED_EQUAL and SLJIT_ORDERED_LESS_EQUAL where sljit_cmp_info + returns with a zero value. + + Otherwise it returns zero. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); + /* Instruction generation. Returns with any error code. If there is no error, they return with SLJIT_SUCCESS. */ @@ -683,9 +696,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) overwrites the previous context. */ -/* The compiled function uses cdecl calling - * convention instead of SLJIT_FUNC. */ -#define SLJIT_ENTER_CDECL 0x00000001 +/* Saved registers between SLJIT_S0 and SLJIT_S(n - 1) (inclusive) + are not saved / restored on function enter / return. Instead, + these registers can be used to pass / return data (such as + global / local context pointers) across function calls. The + value of n must be between 1 and 3. Furthermore, this option + is only supported by register argument calling convention, so + SLJIT_ENTER_REG_ARG (see below) must be specified as well. */ +#define SLJIT_ENTER_KEEP(n) (n) + +/* The compiled function uses an sljit specific register argument + * calling convention. This is a lightweight function call type where + * both the caller and called function must be compiled with sljit. + * The jump type of the function call must be SLJIT_CALL_REG_ARG + * and the called function must store all arguments in registers. */ +#define SLJIT_ENTER_REG_ARG 0x00000004 /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ #define SLJIT_MAX_LOCAL_SIZE 65536 @@ -792,8 +817,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * Write-back is supported except for one instruction: 32 bit signed load with [reg+imm] addressing mode on 64 bit. mips: [reg+imm], -65536 <= imm <= 65535 - sparc: [reg+imm], -4096 <= imm <= 4095 - [reg+reg] is supported + Write-back is not supported + riscv: [reg+imm], -2048 <= imm <= 2047 + Write-back is not supported s390x: [reg+imm], -2^19 <= imm < 2^19 [reg+reg] is supported Write-back is not supported @@ -1207,41 +1233,70 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_SET_CARRY SLJIT_SET(SLJIT_CARRY) #define SLJIT_NOT_CARRY 13 -/* Floating point comparison types. */ -#define SLJIT_EQUAL_F64 14 -#define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_32) -#define SLJIT_SET_EQUAL_F SLJIT_SET(SLJIT_EQUAL_F64) -#define SLJIT_NOT_EQUAL_F64 15 -#define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_32) -#define SLJIT_SET_NOT_EQUAL_F SLJIT_SET(SLJIT_NOT_EQUAL_F64) -#define SLJIT_LESS_F64 16 -#define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_32) -#define SLJIT_SET_LESS_F SLJIT_SET(SLJIT_LESS_F64) -#define SLJIT_GREATER_EQUAL_F64 17 -#define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_32) -#define SLJIT_SET_GREATER_EQUAL_F SLJIT_SET(SLJIT_GREATER_EQUAL_F64) -#define SLJIT_GREATER_F64 18 -#define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_32) -#define SLJIT_SET_GREATER_F SLJIT_SET(SLJIT_GREATER_F64) -#define SLJIT_LESS_EQUAL_F64 19 -#define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_32) -#define SLJIT_SET_LESS_EQUAL_F SLJIT_SET(SLJIT_LESS_EQUAL_F64) -#define SLJIT_UNORDERED_F64 20 -#define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_32) -#define SLJIT_SET_UNORDERED_F SLJIT_SET(SLJIT_UNORDERED_F64) -#define SLJIT_ORDERED_F64 21 -#define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_32) -#define SLJIT_SET_ORDERED_F SLJIT_SET(SLJIT_ORDERED_F64) +/* Basic floating point comparison types. + + Note: when the comparison result is unordered, their behaviour is unspecified. */ + +#define SLJIT_F_EQUAL 14 +#define SLJIT_SET_F_EQUAL SLJIT_SET(SLJIT_F_EQUAL) +#define SLJIT_F_NOT_EQUAL 15 +#define SLJIT_SET_F_NOT_EQUAL SLJIT_SET(SLJIT_F_NOT_EQUAL) +#define SLJIT_F_LESS 16 +#define SLJIT_SET_F_LESS SLJIT_SET(SLJIT_F_LESS) +#define SLJIT_F_GREATER_EQUAL 17 +#define SLJIT_SET_F_GREATER_EQUAL SLJIT_SET(SLJIT_F_GREATER_EQUAL) +#define SLJIT_F_GREATER 18 +#define SLJIT_SET_F_GREATER SLJIT_SET(SLJIT_F_GREATER) +#define SLJIT_F_LESS_EQUAL 19 +#define SLJIT_SET_F_LESS_EQUAL SLJIT_SET(SLJIT_F_LESS_EQUAL) + +/* Jumps when either argument contains a NaN value. */ +#define SLJIT_UNORDERED 20 +#define SLJIT_SET_UNORDERED SLJIT_SET(SLJIT_UNORDERED) +/* Jumps when neither argument contains a NaN value. */ +#define SLJIT_ORDERED 21 +#define SLJIT_SET_ORDERED SLJIT_SET(SLJIT_ORDERED) + +/* Ordered / unordered floating point comparison types. + + Note: each comparison type has an ordered and unordered form. Some + architectures supports only either of them (see: sljit_cmp_info). */ + +#define SLJIT_ORDERED_EQUAL 22 +#define SLJIT_SET_ORDERED_EQUAL SLJIT_SET(SLJIT_ORDERED_EQUAL) +#define SLJIT_UNORDERED_OR_NOT_EQUAL 23 +#define SLJIT_SET_UNORDERED_OR_NOT_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_NOT_EQUAL) +#define SLJIT_ORDERED_LESS 24 +#define SLJIT_SET_ORDERED_LESS SLJIT_SET(SLJIT_ORDERED_LESS) +#define SLJIT_UNORDERED_OR_GREATER_EQUAL 25 +#define SLJIT_SET_UNORDERED_OR_GREATER_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_GREATER_EQUAL) +#define SLJIT_ORDERED_GREATER 26 +#define SLJIT_SET_ORDERED_GREATER SLJIT_SET(SLJIT_ORDERED_GREATER) +#define SLJIT_UNORDERED_OR_LESS_EQUAL 27 +#define SLJIT_SET_UNORDERED_OR_LESS_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_LESS_EQUAL) + +#define SLJIT_UNORDERED_OR_EQUAL 28 +#define SLJIT_SET_UNORDERED_OR_EQUAL SLJIT_SET(SLJIT_UNORDERED_OR_EQUAL) +#define SLJIT_ORDERED_NOT_EQUAL 29 +#define SLJIT_SET_ORDERED_NOT_EQUAL SLJIT_SET(SLJIT_ORDERED_NOT_EQUAL) +#define SLJIT_UNORDERED_OR_LESS 30 +#define SLJIT_SET_UNORDERED_OR_LESS SLJIT_SET(SLJIT_UNORDERED_OR_LESS) +#define SLJIT_ORDERED_GREATER_EQUAL 31 +#define SLJIT_SET_ORDERED_GREATER_EQUAL SLJIT_SET(SLJIT_ORDERED_GREATER_EQUAL) +#define SLJIT_UNORDERED_OR_GREATER 32 +#define SLJIT_SET_UNORDERED_OR_GREATER SLJIT_SET(SLJIT_UNORDERED_OR_GREATER) +#define SLJIT_ORDERED_LESS_EQUAL 33 +#define SLJIT_SET_ORDERED_LESS_EQUAL SLJIT_SET(SLJIT_ORDERED_LESS_EQUAL) /* Unconditional jump types. */ -#define SLJIT_JUMP 22 +#define SLJIT_JUMP 34 /* Fast calling method. See sljit_emit_fast_enter / SLJIT_FAST_RETURN. */ -#define SLJIT_FAST_CALL 23 - /* Called function must be declared with the SLJIT_FUNC attribute. */ -#define SLJIT_CALL 24 - /* Called function must be declared with cdecl attribute. - This is the default attribute for C functions. */ -#define SLJIT_CALL_CDECL 25 +#define SLJIT_FAST_CALL 35 + /* Default C calling convention. */ +#define SLJIT_CALL 36 + /* Called function must be an sljit compiled function. + See SLJIT_ENTER_REG_ARG option. */ +#define SLJIT_CALL_REG_ARG 37 /* The target can be changed during runtime (see: sljit_set_jump_addr). */ #define SLJIT_REWRITABLE_JUMP 0x1000 @@ -1249,10 +1304,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi the called function returns to the caller of the current function. The stack usage is reduced before the call, but it is not necessarily reduced to zero. In the latter case the compiler needs to allocate space for some - arguments and the return register must be kept as well. - - This feature is highly experimental and not supported on SPARC platform - at the moment. */ + arguments and the return address must be stored on the stack as well. */ #define SLJIT_CALL_RETURN 0x2000 /* Emit a jump instruction. The destination is not set, only the type of the jump. @@ -1287,7 +1339,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler sljit_emit_jump. However some architectures (i.e: MIPS) may employ special optimizations here. It is suggested to use this comparison form when appropriate. - type must be between SLJIT_EQUAL_F64 and SLJIT_ORDERED_F32 + type must be between SLJIT_F_EQUAL and SLJIT_ORDERED_LESS_EQUAL type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP Flags: destroy flags. Note: if either operand is NaN, the behaviour is undefined for @@ -1320,7 +1372,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw); /* Perform the operation using the conditional flags as the second argument. - Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_F64. The value + Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL. The value represented by the type is 1, if the condition represented by the type is fulfilled, and 0 otherwise. @@ -1339,7 +1391,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co if the condition is satisfied. Unlike other arithmetic operations this instruction does not support memory access. - type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64 + type must be between SLJIT_EQUAL and SLJIT_ORDERED_LESS_EQUAL dst_reg must be a valid register and it can be combined with SLJIT_32 to perform a 32 bit arithmetic operation src must be register or immediate (SLJIT_IMM) @@ -1351,32 +1403,58 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil /* The following flags are used by sljit_emit_mem() and sljit_emit_fmem(). */ +/* Memory load operation. This is the default. */ +#define SLJIT_MEM_LOAD 0x000000 +/* Memory store operation. */ +#define SLJIT_MEM_STORE 0x000200 + +/* Load or stora data from an unaligned address. */ +#define SLJIT_MEM_UNALIGNED 0x000400 +/* Load or store data and update the base address with a single operation. */ +/* Base register is updated before the memory access. */ +#define SLJIT_MEM_PRE 0x000800 +/* Base register is updated after the memory access. */ +#define SLJIT_MEM_POST 0x001000 + +/* The following flags are supported when SLJIT_MEM_UNALIGNED is specified: */ + +/* Defines 16 bit alignment for unaligned accesses. */ +#define SLJIT_MEM_ALIGNED_16 0x010000 +/* Defines 32 bit alignment for unaligned accesses. */ +#define SLJIT_MEM_ALIGNED_32 0x020000 + +/* The following flags are supported when SLJIT_MEM_PRE or + SLJIT_MEM_POST is specified: */ + /* When SLJIT_MEM_SUPP is passed, no instructions are emitted. Instead the function returns with SLJIT_SUCCESS if the instruction form is supported and SLJIT_ERR_UNSUPPORTED otherwise. This flag allows runtime checking of available instruction forms. */ -#define SLJIT_MEM_SUPP 0x0200 -/* Memory load operation. This is the default. */ -#define SLJIT_MEM_LOAD 0x0000 -/* Memory store operation. */ -#define SLJIT_MEM_STORE 0x0400 -/* Base register is updated before the memory access. */ -#define SLJIT_MEM_PRE 0x0800 -/* Base register is updated after the memory access. */ -#define SLJIT_MEM_POST 0x1000 +#define SLJIT_MEM_SUPP 0x010000 -/* Emit a single memory load or store with update instruction. When the - requested instruction form is not supported by the CPU, it returns - with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This - allows specializing tight loops based on the supported instruction - forms (see SLJIT_MEM_SUPP flag). +/* The sljit_emit_mem emits instructions for various memory operations: + + When SLJIT_MEM_UNALIGNED is set in type argument: + Emit instructions for unaligned memory loads or stores. When + SLJIT_UNALIGNED is not defined, the only way to access unaligned + memory data is using sljit_emit_mem. Otherwise all operations (e.g. + sljit_emit_op1/2, or sljit_emit_fop1/2) supports unaligned access. + In general, the performance of unaligned memory accesses are often + lower than aligned and should be avoided. + + When SLJIT_MEM_PRE or SLJIT_MEM_POST is set in type argument: + Emit a single memory load or store with update instruction. + When the requested instruction form is not supported by the CPU, + it returns with SLJIT_ERR_UNSUPPORTED instead of emulating the + instruction. This allows specializing tight loops based on + the supported instruction forms (see SLJIT_MEM_SUPP flag). type must be between SLJIT_MOV and SLJIT_MOV_P and can be - combined with SLJIT_MEM_* flags. Either SLJIT_MEM_PRE - or SLJIT_MEM_POST must be specified. + combined with SLJIT_MEM_* flags. reg is the source or destination register, and must be different from the base register of the mem operand - mem must be a SLJIT_MEM1() or SLJIT_MEM2() operand + when SLJIT_MEM_PRE or SLJIT_MEM_POST is passed + mem must be a memory operand Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, @@ -1386,9 +1464,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile /* Same as sljit_emit_mem except the followings: type must be SLJIT_MOV_F64 or SLJIT_MOV_F32 and can be - combined with SLJIT_MEM_* flags. Either SLJIT_MEM_PRE - or SLJIT_MEM_POST must be specified. - freg is the source or destination floating point register */ + combined with SLJIT_MEM_* flags. + freg is the source or destination floating point register + mem must be a memory operand + + Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, @@ -1547,7 +1627,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg); /* The following function is a helper function for sljit_emit_op_custom. It returns with the real machine register index of any SLJIT_FLOAT register. - Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ + Note: the index is always an even number on ARM-32, MIPS. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg); diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c index 7b87f59..cd3affb 100644 --- a/src/sljit/sljitNativeARM_32.c +++ b/src/sljit/sljitNativeARM_32.c @@ -100,6 +100,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define CMP 0xe1400000 #define BKPT 0xe1200070 #define EOR 0xe0200000 +#define LDR 0xe5100000 #define MOV 0xe1a00000 #define MUL 0xe0000090 #define MVN 0xe1e00000 @@ -111,6 +112,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define RSC 0xe0e00000 #define SBC 0xe0c00000 #define SMULL 0xe0c00090 +#define STR 0xe5000000 #define SUB 0xe0400000 #define TST 0xe1000000 #define UMULL 0xe0800090 @@ -1049,7 +1051,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_uw imm, offset; - sljit_s32 i, tmp, size, word_arg_count, saved_arg_count; + sljit_s32 i, tmp, size, word_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); #ifdef __SOFTFP__ sljit_u32 float_arg_count; #else @@ -1065,7 +1068,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi imm = 0; tmp = SLJIT_S0 - saveds; - for (i = SLJIT_S0; i > tmp; i--) + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) imm |= (sljit_uw)1 << reg_map[i]; for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) @@ -1082,7 +1085,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2))); /* Stack must be aligned to 8 bytes: */ - size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { if ((size & SSIZE_OF(sw)) != 0) { @@ -1103,6 +1106,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi local_size = ((size + local_size + 0x7) & ~0x7) - size; compiler->local_size = local_size; + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + arg_types >>= SLJIT_ARG_SHIFT; word_arg_count = 0; saved_arg_count = 0; @@ -1148,8 +1154,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (offset < 4 * sizeof(sljit_sw)) FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2))); else - FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 - | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))); break; } @@ -1217,7 +1222,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) size += SSIZE_OF(sw); @@ -1241,6 +1246,7 @@ static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) { sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 saveds_restore_start = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); sljit_s32 lr_dst = TMP_PC; sljit_uw reg_list; @@ -1277,8 +1283,11 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit reg_list |= (sljit_uw)1 << reg_map[lr_dst]; tmp = SLJIT_S0 - compiler->saveds; - for (i = SLJIT_S0; i > tmp; i--) - reg_list |= (sljit_uw)1 << reg_map[i]; + if (saveds_restore_start != tmp) { + for (i = saveds_restore_start; i > tmp; i--) + reg_list |= (sljit_uw)1 << reg_map[i]; + } else + saveds_restore_start = 0; for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) reg_list |= (sljit_uw)1 << reg_map[i]; @@ -1298,16 +1307,15 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (reg_list == 0) return SLJIT_SUCCESS; - if (compiler->saveds > 0) { - SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_S0])); - lr_dst = SLJIT_S0; + if (saveds_restore_start != 0) { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[saveds_restore_start])); + lr_dst = saveds_restore_start; } else { SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_FIRST_SAVED_REG])); lr_dst = SLJIT_FIRST_SAVED_REG; } - return push_inst(compiler, data_transfer_insts[WORD_SIZE | LOAD_DATA] | 0x800000 - | RN(SLJIT_SP) | RD(lr_dst) | (sljit_uw)(frame_size - 2 * SSIZE_OF(sw))); + return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(lr_dst) | (sljit_uw)(frame_size - 2 * SSIZE_OF(sw))); } if (local_size > 0) @@ -1674,23 +1682,17 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) { sljit_uw imm, offset_reg; - sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags); + sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff; SLJIT_ASSERT (arg & SLJIT_MEM); - SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask)); - if (!(arg & REG_MASK)) { - if (is_type1_transfer) { - FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw & ~(sljit_uw)0xfff)); - argw &= 0xfff; - } - else { - FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw & ~(sljit_uw)0xff)); - argw &= 0xff; - } + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)(argw & ~mask))); + argw &= mask; return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, - is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw))); + (mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw)); } if (arg & OFFS_REG_MASK) { @@ -1698,72 +1700,53 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit arg &= REG_MASK; argw &= 0x3; - if (argw != 0 && !is_type1_transfer) { + if (argw != 0 && (mask == 0xff)) { FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_uw)argw << 7))); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); } /* Bit 25: RM is offset. */ return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, - RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | ((sljit_uw)argw << 7))); + RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_uw)argw << 7))); } arg &= REG_MASK; - if (is_type1_transfer) { - if (argw > 0xfff) { - imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); - if (imm) { - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); - argw = argw & 0xfff; - arg = tmp_reg; - } + if (argw > mask) { + imm = get_imm((sljit_uw)(argw & ~mask)); + if (imm) { + FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); + argw = argw & mask; + arg = tmp_reg; } - else if (argw < -0xfff) { - imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0xfff); - if (imm) { - FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); - argw = -(-argw & 0xfff); - arg = tmp_reg; - } - } - - if (argw >= 0 && argw <= 0xfff) - return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw)); - - if (argw < 0 && argw >= -0xfff) - return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, -argw)); } - else { - if (argw > 0xff) { - imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xff); - if (imm) { - FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm)); - argw = argw & 0xff; - arg = tmp_reg; - } + else if (argw < -mask) { + imm = get_imm((sljit_uw)(-argw & ~mask)); + if (imm) { + FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); + argw = -(-argw & mask); + arg = tmp_reg; } - else if (argw < -0xff) { - imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0xff); - if (imm) { - FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm)); - argw = -(-argw & 0xff); - arg = tmp_reg; - } + } + + if (argw <= mask && argw >= -mask) { + if (argw >= 0) { + if (mask == 0xff) + argw = TYPE2_TRANSFER_IMM(argw); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw)); } - if (argw >= 0 && argw <= 0xff) - return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, TYPE2_TRANSFER_IMM(argw))); + argw = -argw; - if (argw < 0 && argw >= -0xff) { - argw = -argw; - return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, TYPE2_TRANSFER_IMM(argw))); - } + if (mask == 0xff) + argw = TYPE2_TRANSFER_IMM(argw); + + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw)); } FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw)); return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, - RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0))); + RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25)))); } static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, @@ -1961,15 +1944,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile saved_reg_list[saved_reg_count++] = 1; if (saved_reg_count > 0) { - FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8) + FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8) | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); if (saved_reg_count >= 2) { SLJIT_ASSERT(saved_reg_list[1] < 8); - FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */)); + FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */)); } if (saved_reg_count >= 3) { SLJIT_ASSERT(saved_reg_list[2] < 8); - FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */)); + FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */)); } } @@ -1983,13 +1966,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile if (saved_reg_count > 0) { if (saved_reg_count >= 3) { SLJIT_ASSERT(saved_reg_list[2] < 8); - FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */)); + FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */)); } if (saved_reg_count >= 2) { SLJIT_ASSERT(saved_reg_list[1] < 8); - FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); + FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); } - return push_inst(compiler, 0xe49d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8) + return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8) | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } return SLJIT_SUCCESS; @@ -2091,10 +2074,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); } @@ -2370,7 +2350,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return SLJIT_SUCCESS; } -#undef FPU_LOAD #undef EMIT_FPU_DATA_TRANSFER /* --------------------------------------------------------------------- */ @@ -2400,11 +2379,15 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_EQUAL_F64: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ return 0x00000000; case SLJIT_NOT_EQUAL: - case SLJIT_NOT_EQUAL_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ return 0x10000000; case SLJIT_CARRY: @@ -2413,7 +2396,6 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) /* fallthrough */ case SLJIT_LESS: - case SLJIT_LESS_F64: return 0x30000000; case SLJIT_NOT_CARRY: @@ -2422,27 +2404,33 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) /* fallthrough */ case SLJIT_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: return 0x20000000; case SLJIT_GREATER: - case SLJIT_GREATER_F64: + case SLJIT_UNORDERED_OR_GREATER: return 0x80000000; case SLJIT_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: return 0x90000000; case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: return 0xb0000000; case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: return 0xa0000000; case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: return 0xc0000000; case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: return 0xd0000000; case SLJIT_OVERFLOW: @@ -2450,7 +2438,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x10000000; /* fallthrough */ - case SLJIT_UNORDERED_F64: + case SLJIT_UNORDERED: return 0x60000000; case SLJIT_NOT_OVERFLOW: @@ -2458,11 +2446,18 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x00000000; /* fallthrough */ - case SLJIT_ORDERED_F64: + case SLJIT_ORDERED: return 0x70000000; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x40000000; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x50000000; + default: - SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL); + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG); return 0xe0000000; } } @@ -2639,7 +2634,7 @@ static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit } FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2))); } else - FAIL_IF(push_inst(compiler, data_transfer_insts[WORD_SIZE] | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw)))); } break; } @@ -2718,51 +2713,48 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #ifdef __SOFTFP__ - PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); - SLJIT_ASSERT((extra_space & 0x7) == 0); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); - if ((type & SLJIT_CALL_RETURN) && extra_space == 0) - type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); - jump = sljit_emit_jump(compiler, type); - PTR_FAIL_IF(jump == NULL); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); - if (extra_space > 0) { - if (type & SLJIT_CALL_RETURN) - PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, - TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); - PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); - - if (type & SLJIT_CALL_RETURN) { - PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2))); - return jump; + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2))); + return jump; + } } - } - SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); - PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); - return jump; -#else /* !__SOFTFP__ */ + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; + } +#endif /* __SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); } - PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, type); -#endif /* __SOFTFP__ */ } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) @@ -2828,47 +2820,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi } #ifdef __SOFTFP__ - FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); - SLJIT_ASSERT((extra_space & 0x7) == 0); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); - if ((type & SLJIT_CALL_RETURN) && extra_space == 0) - type = SLJIT_JUMP; + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, + TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); - if (extra_space > 0) { - if (type & SLJIT_CALL_RETURN) - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, - TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw)))); + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); - FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space)); + if (type & SLJIT_CALL_RETURN) + return push_inst(compiler, BX | RM(TMP_REG2)); + } - if (type & SLJIT_CALL_RETURN) - return push_inst(compiler, BX | RM(TMP_REG2)); + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + return softfloat_post_call_with_args(compiler, arg_types); } +#endif /* __SOFTFP__ */ - SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); - return softfloat_post_call_with_args(compiler, arg_types); -#else /* !__SOFTFP__ */ if (type & SLJIT_CALL_RETURN) { FAIL_IF(emit_stack_frame_release(compiler, -1)); type = SLJIT_JUMP; } - FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_ijump(compiler, type, src, srcw); -#endif /* __SOFTFP__ */ } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, @@ -2883,7 +2872,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); op = GET_OPCODE(op); - cc = get_cc(compiler, type & 0xff); + cc = get_cc(compiler, type); dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { @@ -2923,7 +2912,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil dst_reg &= ~SLJIT_32; - cc = get_cc(compiler, type & 0xff); + cc = get_cc(compiler, type); if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { tmp = get_imm((sljit_uw)srcw); @@ -2949,6 +2938,231 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc); } +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + sljit_uw imm; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_sw mask = max_offset >= 0x100 ? 0xfff : 0xff; +#else /* !SLJIT_CONFIG_ARM_V5 */ + sljit_sw mask = 0xfff; + + SLJIT_ASSERT(max_offset >= 0x100); +#endif /* SLJIT_CONFIG_ARM_V5 */ + + *mem = TMP_REG1; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + *memw = 0; + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 7)); + } + + arg &= REG_MASK; + + if (arg) { + if (argw <= max_offset && argw >= -mask) { + *mem = arg; + return SLJIT_SUCCESS; + } + + if (argw < 0) { + imm = get_imm((sljit_uw)(-argw & ~mask)); + + if (imm) { + *memw = -(-argw & mask); + return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm); + } + } else if ((argw & mask) <= max_offset) { + imm = get_imm((sljit_uw)(argw & ~mask)); + + if (imm) { + *memw = argw & mask; + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm); + } + } else { + imm = get_imm((sljit_uw)((argw | mask) + 1)); + + if (imm) { + *memw = (argw & mask) - (mask + 1); + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm); + } + } + } + + imm = (sljit_uw)(argw & ~mask); + + if ((argw & mask) > max_offset) { + imm += (sljit_uw)(mask + 1); + *memw = (argw & mask) - (mask + 1); + } else + *memw = argw & mask; + + FAIL_IF(load_immediate(compiler, TMP_REG1, imm)); + + if (arg == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg)); +} + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 flags; + sljit_s32 steps; + sljit_uw add, shift; + + switch (type & 0xff) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + flags = BYTE_SIZE; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + if ((type & 0xff) == SLJIT_MOV_S8) + flags |= SIGNED; + + return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1); + + case SLJIT_MOV_U16: + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 1)); + flags = BYTE_SIZE; + steps = 1; + break; + + case SLJIT_MOV_S16: + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 1)); + flags = BYTE_SIZE | SIGNED; + steps = 1; + break; + + default: + if (type & SLJIT_MEM_ALIGNED_32) { + flags = WORD_SIZE; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1); + } + + if (!(type & SLJIT_MEM_ALIGNED_16)) { + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 3)); + flags = BYTE_SIZE; + steps = 3; + break; + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 2)); + + add = 1; + if (memw < 0) { + add = 0; + memw = -memw; + } + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, reg, mem, TYPE2_TRANSFER_IMM(memw)))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (16 << 7) | (2 << 4))); + + if (!add) { + memw -= 2; + if (memw <= 0) { + memw = -memw; + add = 1; + } + } else + memw += 2; + + return push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw))); + } + + if (reg == mem) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(mem))); + mem = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, reg, mem, TYPE2_TRANSFER_IMM(memw)))); + + if (!add) { + memw -= 2; + if (memw <= 0) { + memw = -memw; + add = 1; + } + } else + memw += 2; + + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw)))); + return push_inst(compiler, ORR | RD(reg) | RN(reg) | RM(TMP_REG2) | (16 << 7)); + } + + SLJIT_ASSERT(steps > 0); + + add = 1; + if (memw < 0) { + add = 0; + memw = -memw; + } + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, reg, mem, memw))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (8 << 7) | (2 << 4))); + + while (1) { + if (!add) { + memw -= 1; + if (memw == 0) + add = 1; + } else + memw += 1; + + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, TMP_REG2, mem, memw))); + + if (--steps == 0) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(TMP_REG2) | (8 << 7) | (2 << 4))); + } + } + + if (reg == mem) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(mem))); + mem = TMP_REG1; + } + + shift = 8; + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, reg, mem, memw))); + + do { + if (!add) { + memw -= 1; + if (memw == 0) + add = 1; + } else + memw += 1; + + if (steps > 1) { + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, TMP_REG2, mem, memw))); + FAIL_IF(push_inst(compiler, ORR | RD(reg) | RN(reg) | RM(TMP_REG2) | (shift << 7))); + shift += 8; + } + } while (--steps != 0); + + flags |= LOAD_DATA; + + if (flags & SIGNED) + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw)))); + else + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, memw))); + + return push_inst(compiler, ORR | RD(reg) | RN(reg) | RM(TMP_REG2) | (shift << 7)); +} + +#endif /* SLJIT_CONFIG_ARM_V5 */ + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) @@ -2959,6 +3173,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile CHECK_ERROR(); CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + if (type & SLJIT_MEM_UNALIGNED) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + is_type1_transfer = 1; switch (type & 0xff) { @@ -3054,6 +3271,106 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_uw)memw)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_s32 max_offset; + sljit_s32 dst; +#endif /* SLJIT_CONFIG_ARM_V5 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if (type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_ALIGNED_32) + return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2))); + + if (type & SLJIT_32) + return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_ALIGNED_16), TMP_REG2, mem, memw); + + max_offset = 0xfff - 7; + if (type & SLJIT_MEM_ALIGNED_16) + max_offset++; + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset)); + mem |= SLJIT_MEM; + + FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_ALIGNED_16), TMP_REG2, mem, memw)); + + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2))); + return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_ALIGNED_16), TMP_REG2, mem, memw + 4); + } + + max_offset = (type & SLJIT_32) ? 0xfff - 3 : 0xfff - 7; + if (type & SLJIT_MEM_ALIGNED_16) + max_offset++; + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset)); + + dst = TMP_REG1; + + /* Stack offset adjustment is not needed because dst + is not stored on the stack when mem is SLJIT_SP. */ + + if (mem == TMP_REG1) { + dst = SLJIT_R3; + + if (compiler->scratches >= 4) + FAIL_IF(push_inst(compiler, STR | (1 << 21) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8)); + } + + mem |= SLJIT_MEM; + + FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_ALIGNED_16), dst, mem, memw)); + FAIL_IF(push_inst(compiler, VMOV | VN(freg) | RD(dst))); + + if (!(type & SLJIT_32)) { + FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_ALIGNED_16), dst, mem, memw + 4)); + FAIL_IF(push_inst(compiler, VMOV | VN(freg) | 0x80 | RD(dst))); + } + + if (dst == SLJIT_R3 && compiler->scratches >= 4) + FAIL_IF(push_inst(compiler, (LDR ^ (0x1 << 24)) | (0x1 << 23) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8)); + return SLJIT_SUCCESS; +#else /* !SLJIT_CONFIG_ARM_V5 */ + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2))); + + if (type & SLJIT_32) + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2))); + return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1); + } + + if (type & SLJIT_32) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1)); + return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2)); + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1)); + return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1)); +#endif /* SLJIT_CONFIG_ARM_V5 */ +} + +#undef FPU_LOAD + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c index 96453b4..3ab060c 100644 --- a/src/sljit/sljitNativeARM_64.c +++ b/src/sljit/sljitNativeARM_64.c @@ -137,8 +137,6 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #define UDIV 0x9ac00800 #define UMULH 0x9bc03c00 -/* dest_reg is the absolute name of the register - Useful for reordering instructions in the delay slot. */ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); @@ -296,8 +294,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } next_addr = compute_next_addr(label, jump, const_, put_label); } - code_ptr ++; - word_count ++; + code_ptr++; + word_count++; } while (buf_ptr < buf_end); buf = buf->next; @@ -924,14 +922,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_s32 prev, fprev, saved_regs_size, i, tmp; - sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_ins offs; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2); saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); local_size = (local_size + saved_regs_size + 0xf) & ~0xf; @@ -954,7 +952,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi prev = -1; tmp = SLJIT_S0 - saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { if (prev == -1) { prev = i; continue; @@ -1003,23 +1001,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (prev != -1) FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); - arg_types >>= SLJIT_ARG_SHIFT; #ifdef _WIN32 if (local_size > 4096) FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); #endif /* _WIN32 */ - tmp = 0; - while (arg_types > 0) { - if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { - if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count))); + if (!(options & SLJIT_ENTER_REG_ARG)) { + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - saved_arg_count) | RN(TMP_ZERO) | RM(tmp))); + saved_arg_count++; + } tmp++; } - word_arg_count++; + arg_types >>= SLJIT_ARG_SHIFT; } - arg_types >>= SLJIT_ARG_SHIFT; } #ifdef _WIN32 @@ -1100,7 +1102,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2); saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf; @@ -1137,7 +1139,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) prev = -1; tmp = SLJIT_S0 - compiler->saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { if (prev == -1) { prev = i; continue; @@ -1392,10 +1394,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); } @@ -1550,10 +1549,9 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1); src = TMP_REG1; } else if (src & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) srcw = (sljit_s32)srcw; -#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; } @@ -1699,11 +1697,15 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_EQUAL_F64: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ return 0x1; case SLJIT_NOT_EQUAL: - case SLJIT_NOT_EQUAL_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ return 0x0; case SLJIT_CARRY: @@ -1712,7 +1714,6 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) /* fallthrough */ case SLJIT_LESS: - case SLJIT_LESS_F64: return 0x2; case SLJIT_NOT_CARRY: @@ -1721,27 +1722,33 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) /* fallthrough */ case SLJIT_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: return 0x3; case SLJIT_GREATER: - case SLJIT_GREATER_F64: + case SLJIT_UNORDERED_OR_GREATER: return 0x9; case SLJIT_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: return 0x8; case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: return 0xa; case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: return 0xb; case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: return 0xd; case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: return 0xc; case SLJIT_OVERFLOW: @@ -1749,7 +1756,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x0; /* fallthrough */ - case SLJIT_UNORDERED_F64: + case SLJIT_UNORDERED: return 0x7; case SLJIT_NOT_OVERFLOW: @@ -1757,9 +1764,16 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x1; /* fallthrough */ - case SLJIT_ORDERED_F64: + case SLJIT_ORDERED: return 0x6; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x5; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x4; + default: SLJIT_UNREACHABLE(); return 0xe; @@ -1820,11 +1834,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, type); } @@ -1914,11 +1924,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi type = SLJIT_JUMP; } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_ijump(compiler, type, src, srcw); } @@ -1933,7 +1939,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - cc = get_cc(compiler, type & 0xff); + cc = get_cc(compiler, type); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (GET_OPCODE(op) < SLJIT_ADD) { @@ -1988,7 +1994,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil srcw = 0; } - cc = get_cc(compiler, type & 0xff); + cc = get_cc(compiler, type); dst_reg &= ~SLJIT_32; return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); @@ -2003,6 +2009,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile CHECK_ERROR(); CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + if (type & SLJIT_MEM_UNALIGNED) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) return SLJIT_ERR_UNSUPPORTED; @@ -2057,6 +2066,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + if (type & SLJIT_MEM_UNALIGNED) + return sljit_emit_fmem_unaligned(compiler, type, freg, mem, memw); + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) return SLJIT_ERR_UNSUPPORTED; diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c index ed21ea7..3a38554 100644 --- a/src/sljit/sljitNativeARM_T2_32.c +++ b/src/sljit/sljitNativeARM_T2_32.c @@ -434,8 +434,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } next_addr = compute_next_addr(label, jump, const_, put_label); } - code_ptr ++; - half_count ++; + code_ptr++; + half_count++; } while (buf_ptr < buf_end); buf = buf->next; @@ -890,8 +890,8 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s #define HALF_SIZE 0x08 #define PRELOAD 0x0c -#define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE))) -#define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift))) +#define IS_WORD_SIZE(flags) (!((flags) & (BYTE_SIZE | HALF_SIZE))) +#define ALIGN_CHECK(argw, imm, shift) (!((argw) & ~((imm) << (shift)))) /* 1st letter: @@ -993,8 +993,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit sljit_uw tmp; SLJIT_ASSERT(arg & SLJIT_MEM); - SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); - arg &= ~SLJIT_MEM; + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff)); if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { tmp = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); @@ -1012,15 +1011,17 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { argw &= 0x3; other_r = OFFS_REG(arg); - arg &= 0xf; + arg &= REG_MASK; if (!argw && IS_3_LO_REGS(reg, arg, other_r)) return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)); return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4)); } + arg &= REG_MASK; + if (argw > 0xfff) { - tmp = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff); + tmp = get_imm((sljit_uw)(argw & ~0xfff)); if (tmp != INVALID_IMM) { push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | tmp); arg = tmp_reg; @@ -1028,7 +1029,7 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } else if (argw < -0xff) { - tmp = get_imm((sljit_uw)-argw & ~(sljit_uw)0xff); + tmp = get_imm((sljit_uw)(-argw & ~0xff)); if (tmp != INVALID_IMM) { push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | tmp); arg = tmp_reg; @@ -1036,27 +1037,28 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit } } + /* 16 bit instruction forms. */ if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { tmp = 3; if (IS_WORD_SIZE(flags)) { - if (OFFSET_CHECK(0x1f, 2)) + if (ALIGN_CHECK(argw, 0x1f, 2)) tmp = 2; } else if (flags & BYTE_SIZE) { - if (OFFSET_CHECK(0x1f, 0)) + if (ALIGN_CHECK(argw, 0x1f, 0)) tmp = 0; } else { SLJIT_ASSERT(flags & HALF_SIZE); - if (OFFSET_CHECK(0x1f, 1)) + if (ALIGN_CHECK(argw, 0x1f, 1)) tmp = 1; } if (tmp < 3) return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp))); } - else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && OFFSET_CHECK(0xff, 2) && reg_map[reg] <= 7) { + else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) { /* SP based immediate. */ return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2)); } @@ -1074,6 +1076,9 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)); } +#undef ALIGN_CHECK +#undef IS_WORD_SIZE + /* --------------------------------------------------------------------- */ /* Entry, exit */ /* --------------------------------------------------------------------- */ @@ -1082,7 +1087,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 size, i, tmp, word_arg_count, saved_arg_count; + sljit_s32 size, i, tmp, word_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_uw offset; sljit_uw imm = 0; #ifdef __SOFTFP__ @@ -1098,7 +1104,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); tmp = SLJIT_S0 - saveds; - for (i = SLJIT_S0; i > tmp; i--) + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) imm |= (sljit_uw)1 << reg_map[i]; for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) @@ -1110,7 +1116,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi : push_inst16(compiler, PUSH | (1 << 8) | imm)); /* Stack must be aligned to 8 bytes: (LR, R4) */ - size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { if ((size & SSIZE_OF(sw)) != 0) { @@ -1131,6 +1137,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi local_size = ((size + local_size + 0x7) & ~0x7) - size; compiler->local_size = local_size; + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + arg_types >>= SLJIT_ARG_SHIFT; word_arg_count = 0; saved_arg_count = 0; @@ -1173,13 +1182,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi else break; - SLJIT_ASSERT(reg_map[tmp] <= 7); - if (offset < 4 * sizeof(sljit_sw)) - FAIL_IF(push_inst16(compiler, MOV | RD3(tmp) | (offset << 1))); - else + FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1))); + else if (reg_map[tmp] <= 7) FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2))); + else + FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP) + | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw))))); break; } @@ -1293,7 +1303,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)) size += SSIZE_OF(sw); @@ -1325,6 +1335,7 @@ static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm) static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size) { sljit_s32 local_size, fscratches, fsaveds, i, tmp; + sljit_s32 saveds_restore_start = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); sljit_s32 lr_dst = TMP_PC; sljit_uw reg_list; @@ -1358,8 +1369,11 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit reg_list = 0; tmp = SLJIT_S0 - compiler->saveds; - for (i = SLJIT_S0; i > tmp; i--) - reg_list |= (sljit_uw)1 << reg_map[i]; + if (saveds_restore_start != tmp) { + for (i = saveds_restore_start; i > tmp; i--) + reg_list |= (sljit_uw)1 << reg_map[i]; + } else + saveds_restore_start = 0; for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) reg_list |= (sljit_uw)1 << reg_map[i]; @@ -1379,9 +1393,9 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit if (reg_list == 0) return SLJIT_SUCCESS; - if (compiler->saveds > 0) { - SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_S0])); - lr_dst = SLJIT_S0; + if (saveds_restore_start != 0) { + SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[saveds_restore_start])); + lr_dst = saveds_restore_start; } else { SLJIT_ASSERT(reg_list == ((sljit_uw)1 << reg_map[SLJIT_FIRST_SAVED_REG])); lr_dst = SLJIT_FIRST_SAVED_REG; @@ -1685,10 +1699,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); } @@ -1955,8 +1966,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw); } -#undef FPU_LOAD - /* --------------------------------------------------------------------- */ /* Other instructions */ /* --------------------------------------------------------------------- */ @@ -1984,11 +1993,15 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_EQUAL_F64: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ return 0x0; case SLJIT_NOT_EQUAL: - case SLJIT_NOT_EQUAL_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ return 0x1; case SLJIT_CARRY: @@ -1997,7 +2010,6 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) /* fallthrough */ case SLJIT_LESS: - case SLJIT_LESS_F64: return 0x3; case SLJIT_NOT_CARRY: @@ -2006,27 +2018,33 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) /* fallthrough */ case SLJIT_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: return 0x2; case SLJIT_GREATER: - case SLJIT_GREATER_F64: + case SLJIT_UNORDERED_OR_GREATER: return 0x8; case SLJIT_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: return 0x9; case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: return 0xb; case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: return 0xa; case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: return 0xc; case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: return 0xd; case SLJIT_OVERFLOW: @@ -2034,7 +2052,7 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x1; /* fallthrough */ - case SLJIT_UNORDERED_F64: + case SLJIT_UNORDERED: return 0x6; case SLJIT_NOT_OVERFLOW: @@ -2042,9 +2060,16 @@ static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x0; /* fallthrough */ - case SLJIT_ORDERED_F64: + case SLJIT_ORDERED: return 0x7; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x4; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x5; + default: /* SLJIT_JUMP */ SLJIT_UNREACHABLE(); return 0xe; @@ -2289,52 +2314,49 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #ifdef __SOFTFP__ - PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); - SLJIT_ASSERT((extra_space & 0x7) == 0); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); - if ((type & SLJIT_CALL_RETURN) && extra_space == 0) - type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); + jump = sljit_emit_jump(compiler, type); + PTR_FAIL_IF(jump == NULL); - jump = sljit_emit_jump(compiler, type); - PTR_FAIL_IF(jump == NULL); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); - if (extra_space > 0) { - if (type & SLJIT_CALL_RETURN) - PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) - | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); - PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); - - if (type & SLJIT_CALL_RETURN) { - PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2))); - return jump; + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2))); + return jump; + } } - } - SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); - PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); - return jump; -#else + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types)); + return jump; + } +#endif /* __SOFTFP__ */ + if (type & SLJIT_CALL_RETURN) { /* ldmia sp!, {..., lr} */ PTR_FAIL_IF(emit_stack_frame_release(compiler, -1)); type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); } - PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, type); -#endif } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) @@ -2391,48 +2413,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi } #ifdef __SOFTFP__ - FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); - SLJIT_ASSERT((extra_space & 0x7) == 0); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space)); + SLJIT_ASSERT((extra_space & 0x7) == 0); - if ((type & SLJIT_CALL_RETURN) && extra_space == 0) - type = SLJIT_JUMP; + if ((type & SLJIT_CALL_RETURN) && extra_space == 0) + type = SLJIT_JUMP; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); + FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); + if (extra_space > 0) { + if (type & SLJIT_CALL_RETURN) + FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) + | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); - if (extra_space > 0) { - if (type & SLJIT_CALL_RETURN) - FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2) - | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw)))); + FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); - FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2))); + if (type & SLJIT_CALL_RETURN) + return push_inst16(compiler, BX | RN3(TMP_REG2)); + } - if (type & SLJIT_CALL_RETURN) - return push_inst16(compiler, BX | RN3(TMP_REG2)); + SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); + return softfloat_post_call_with_args(compiler, arg_types); } +#endif /* __SOFTFP__ */ - SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN)); - return softfloat_post_call_with_args(compiler, arg_types); -#else /* !__SOFTFP__ */ if (type & SLJIT_CALL_RETURN) { /* ldmia sp!, {..., lr} */ FAIL_IF(emit_stack_frame_release(compiler, -1)); type = SLJIT_JUMP; } - FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif +#ifndef __SOFTFP__ + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(hardfloat_call_with_args(compiler, arg_types)); +#endif /* !__SOFTFP__ */ + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_ijump(compiler, type, src, srcw); -#endif /* __SOFTFP__ */ } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, @@ -2447,7 +2466,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); op = GET_OPCODE(op); - cc = get_cc(compiler, type & 0xff); + cc = get_cc(compiler, type); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { @@ -2499,7 +2518,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil dst_reg &= ~SLJIT_32; - cc = get_cc(compiler, type & 0xff); + cc = get_cc(compiler, type); if (!(src & SLJIT_IMM)) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); @@ -2546,6 +2565,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile CHECK_ERROR(); CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + if (type & SLJIT_MEM_UNALIGNED) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255)) return SLJIT_ERR_UNSUPPORTED; @@ -2594,6 +2616,109 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw); } +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + sljit_uw imm; + + *mem = TMP_REG1; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + *memw = 0; + return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6)); + } + + arg &= REG_MASK; + + if (arg) { + if (argw <= max_offset && argw >= -0xff) { + *mem = arg; + return SLJIT_SUCCESS; + } + + if (argw < 0) { + imm = get_imm((sljit_uw)(-argw & ~0xff)); + + if (imm) { + *memw = -(-argw & 0xff); + return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } else if ((argw & 0xfff) <= max_offset) { + imm = get_imm((sljit_uw)(argw & ~0xfff)); + + if (imm) { + *memw = argw & 0xfff; + return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } else { + imm = get_imm((sljit_uw)((argw | 0xfff) + 1)); + + if (imm) { + *memw = (argw & 0xfff) - 0x1000; + return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm); + } + } + } + + imm = (sljit_uw)(argw & ~0xfff); + + if ((argw & 0xfff) > max_offset) { + imm += 0x1000; + *memw = (argw & 0xfff) - 0x1000; + } else + *memw = argw & 0xfff; + + FAIL_IF(load_immediate(compiler, TMP_REG1, imm)); + + if (arg == 0) + return SLJIT_SUCCESS; + + return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if (type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_MEM_ALIGNED_32) + return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | DN4(freg) | RT4(TMP_REG2))); + + if (type & SLJIT_32) + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1); + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | DN4(freg) | 0x80 | RT4(TMP_REG2))); + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1); + } + + if (type & SLJIT_32) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + return push_inst32(compiler, VMOV | DN4(freg) | RT4(TMP_REG2)); + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4)); + mem |= SLJIT_MEM; + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1)); + return push_inst32(compiler, VMOV2 | DM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1)); +} + +#undef FPU_LOAD + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c index 1a06b17..ca9dbd0 100644 --- a/src/sljit/sljitNativeMIPS_32.c +++ b/src/sljit/sljitNativeMIPS_32.c @@ -38,383 +38,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; } -#define EMIT_LOGICAL(op_imm, op_norm) \ - if (flags & SRC2_IMM) { \ - if (op & SLJIT_SET_Z) \ - FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ - if (!(flags & UNUSED_DEST)) \ - FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ - } \ - else { \ - if (op & SLJIT_SET_Z) \ - FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (!(flags & UNUSED_DEST)) \ - FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ - } - -#define EMIT_SHIFT(op_imm, op_v) \ - if (flags & SRC2_IMM) { \ - if (op & SLJIT_SET_Z) \ - FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ - if (!(flags & UNUSED_DEST)) \ - FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ - } \ - else { \ - if (op & SLJIT_SET_Z) \ - FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (!(flags & UNUSED_DEST)) \ - FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \ - } - -static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, - sljit_s32 dst, sljit_s32 src1, sljit_sw src2) -{ - sljit_s32 is_overflow, is_carry, is_handled; - - switch (GET_OPCODE(op)) { - case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (dst != src2) - return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_U8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_S8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); -#else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ - } - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_U16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); -#else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ - } - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (!(flags & UNUSED_DEST)) - FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); - return SLJIT_SUCCESS; - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (!(flags & UNUSED_DEST)) - FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst))); -#else /* SLJIT_MIPS_REV < 1 */ - if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { - FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); - return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); - } - /* Nearly all instructions are unmovable in the following sequence. */ - FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); - /* Check zero. */ - FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst))); - /* Loop for searching the highest bit. */ - FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst))); - FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); -#endif /* SLJIT_MIPS_REV >= 1 */ - return SLJIT_SUCCESS; - - case SLJIT_ADD: - is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; - is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); - - if (flags & SRC2_IMM) { - if (is_overflow) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); - else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); - } - else if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - - if (is_overflow || is_carry) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - } - } - /* dst may be the same as src1 or src2. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); - } - else { - if (is_overflow) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - else if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - - if (is_overflow || is_carry) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); - /* dst may be the same as src1 or src2. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); - } - - /* a + b >= a | b (otherwise, the carry should be set to 1). */ - if (is_overflow || is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - if (!is_overflow) - return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); - return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); - - case SLJIT_ADDC: - is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); - - if (flags & SRC2_IMM) { - if (is_carry) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - } - } - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); - } else { - if (is_carry) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); - } - if (is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - - FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); - if (!is_carry) - return SLJIT_SUCCESS; - - /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - /* Set carry flag. */ - return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); - - case SLJIT_SUB: - if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; - flags &= ~SRC2_IMM; - } - - is_handled = 0; - - if (flags & SRC2_IMM) { - if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - is_handled = 1; - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { - FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - is_handled = 1; - } - } - - if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { - is_handled = 1; - - if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; - flags &= ~SRC2_IMM; - } - - if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); - } - else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL) - { - FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { - FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL) - { - FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); - } - } - - if (is_handled) { - if (flags & SRC2_IMM) { - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); - if (!(flags & UNUSED_DEST)) - return push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)); - } - else { - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (!(flags & UNUSED_DEST)) - return push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)); - } - return SLJIT_SUCCESS; - } - - is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; - is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); - - if (flags & SRC2_IMM) { - if (is_overflow) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); - else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); - } - else if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); - - if (is_overflow || is_carry) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - /* dst may be the same as src1 or src2. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); - } - else { - if (is_overflow) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - else if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - - if (is_overflow || is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); - /* dst may be the same as src1 or src2. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); - } - - if (!is_overflow) - return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); - return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); - - case SLJIT_SUBC: - if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; - flags &= ~SRC2_IMM; - } - - is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); - - if (flags & SRC2_IMM) { - if (is_carry) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); - } - else { - if (is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); - } - - if (is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); - - FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); - return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS; - - case SLJIT_MUL: - SLJIT_ASSERT(!(flags & SRC2_IMM)); - - if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); -#else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); - return push_inst(compiler, MFLO | D(dst), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ - } - -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) - FAIL_IF(push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, MUH | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); -#else /* SLJIT_MIPS_REV < 6 */ - FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); -#endif /* SLJIT_MIPS_REV >= 6 */ - FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); - return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); - - case SLJIT_AND: - EMIT_LOGICAL(ANDI, AND); - return SLJIT_SUCCESS; - - case SLJIT_OR: - EMIT_LOGICAL(ORI, OR); - return SLJIT_SUCCESS; - - case SLJIT_XOR: - EMIT_LOGICAL(XORI, XOR); - return SLJIT_SUCCESS; - - case SLJIT_SHL: - EMIT_SHIFT(SLL, SLLV); - return SLJIT_SUCCESS; - - case SLJIT_LSHR: - EMIT_SHIFT(SRL, SRLV); - return SLJIT_SUCCESS; - - case SLJIT_ASHR: - EMIT_SHIFT(SRA, SRAV); - return SLJIT_SUCCESS; - } - - SLJIT_UNREACHABLE(); - return SLJIT_SUCCESS; -} - static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) { FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst))); @@ -573,8 +196,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile sljit_s32 arg_types) { struct sljit_jump *jump; - sljit_u32 extra_space = (sljit_u32)type; - sljit_ins ins; + sljit_u32 extra_space = 0; + sljit_ins ins = NOP; CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); @@ -583,14 +206,23 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile PTR_FAIL_IF(!jump); set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) { + extra_space = (sljit_u32)type; + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); + } else if (type & SLJIT_CALL_RETURN) + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); - PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); + if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) { - jump->flags |= IS_JAL | IS_CALL; + jump->flags |= IS_JAL; + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + jump->flags |= IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); } else PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); @@ -598,6 +230,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 2; + if (extra_space == 0) return jump; @@ -623,16 +258,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + srcw = 0; + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + src = PIC_ADDR_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + if (ins != NOP) + FAIL_IF(push_inst(compiler, ins, MOVABLE_INS)); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); + } + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); if (src & SLJIT_IMM) FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); - else if (FAST_IS_REG(src)) + else if (src != PIC_ADDR_REG) FAIL_IF(push_inst(compiler, ADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); - else if (src & SLJIT_MEM) { - ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); - } FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space)); diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c index c2b3d83..443bade 100644 --- a/src/sljit/sljitNativeMIPS_64.c +++ b/src/sljit/sljitNativeMIPS_64.c @@ -118,421 +118,6 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); } -#define SELECT_OP(a, b) \ - (!(op & SLJIT_32) ? a : b) - -#define EMIT_LOGICAL(op_imm, op_norm) \ - if (flags & SRC2_IMM) { \ - if (op & SLJIT_SET_Z) \ - FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ - if (!(flags & UNUSED_DEST)) \ - FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ - } \ - else { \ - if (op & SLJIT_SET_Z) \ - FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (!(flags & UNUSED_DEST)) \ - FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ - } - -#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ - if (flags & SRC2_IMM) { \ - if (src2 >= 32) { \ - SLJIT_ASSERT(!(op & SLJIT_32)); \ - ins = op_dimm32; \ - src2 -= 32; \ - } \ - else \ - ins = (op & SLJIT_32) ? op_imm : op_dimm; \ - if (op & SLJIT_SET_Z) \ - FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ - if (!(flags & UNUSED_DEST)) \ - FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ - } \ - else { \ - ins = (op & SLJIT_32) ? op_v : op_dv; \ - if (op & SLJIT_SET_Z) \ - FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (!(flags & UNUSED_DEST)) \ - FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \ - } - -static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, - sljit_s32 dst, sljit_s32 src1, sljit_sw src2) -{ - sljit_ins ins; - sljit_s32 is_overflow, is_carry, is_handled; - - switch (GET_OPCODE(op)) { - case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (dst != src2) - return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_U8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_S8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - if (op & SLJIT_32) - return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); - return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); - } - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_U16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) - return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - if (op & SLJIT_32) - return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 1 */ - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); - return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); - } - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_U32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) - if (dst == src2) - return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 2 */ - FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); - return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); - } - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_S32: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); - } - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (!(flags & UNUSED_DEST)) - FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); - return SLJIT_SUCCESS; - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (!(flags & UNUSED_DEST)) - FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst))); -#else /* SLJIT_MIPS_REV < 1 */ - if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { - FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); - return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); - } - /* Nearly all instructions are unmovable in the following sequence. */ - FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); - /* Check zero. */ - FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_32) ? 32 : 64), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst))); - /* Loop for searching the highest bit. */ - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); - FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); -#endif /* SLJIT_MIPS_REV >= 1 */ - return SLJIT_SUCCESS; - - case SLJIT_ADD: - is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; - is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); - - if (flags & SRC2_IMM) { - if (is_overflow) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); - else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); - } - else if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - - if (is_overflow || is_carry) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - else { - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - } - } - /* dst may be the same as src1 or src2. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); - } - else { - if (is_overflow) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - else if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - - if (is_overflow || is_carry) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); - /* dst may be the same as src1 or src2. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); - } - - /* a + b >= a | b (otherwise, the carry should be set to 1). */ - if (is_overflow || is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - if (!is_overflow) - return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); - return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); - - case SLJIT_ADDC: - is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); - - if (flags & SRC2_IMM) { - if (is_carry) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - else { - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - } - } - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); - } else { - if (is_carry) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); - } - if (is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - - FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); - if (!is_carry) - return SLJIT_SUCCESS; - - /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - /* Set carry flag. */ - return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); - - case SLJIT_SUB: - if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; - flags &= ~SRC2_IMM; - } - - is_handled = 0; - - if (flags & SRC2_IMM) { - if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - is_handled = 1; - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { - FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - is_handled = 1; - } - } - - if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { - is_handled = 1; - - if (flags & SRC2_IMM) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; - flags &= ~SRC2_IMM; - } - - if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); - } - else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL) - { - FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { - FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); - } - else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL) - { - FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); - } - } - - if (is_handled) { - if (flags & SRC2_IMM) { - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); - if (!(flags & UNUSED_DEST)) - return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)); - } - else { - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (!(flags & UNUSED_DEST)) - return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)); - } - return SLJIT_SUCCESS; - } - - is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; - is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); - - if (flags & SRC2_IMM) { - if (is_overflow) { - if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); - else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); - } - else if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); - - if (is_overflow || is_carry) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); - /* dst may be the same as src1 or src2. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); - } - else { - if (is_overflow) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - else if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - - if (is_overflow || is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); - /* dst may be the same as src1 or src2. */ - if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) - FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); - } - - if (!is_overflow) - return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); - if (op & SLJIT_SET_Z) - FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); - return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); - - case SLJIT_SUBC: - if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); - src2 = TMP_REG2; - flags &= ~SRC2_IMM; - } - - is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); - - if (flags & SRC2_IMM) { - if (is_carry) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); - } - else { - if (is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - /* dst may be the same as src1 or src2. */ - FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); - } - - if (is_carry) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); - - FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); - return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS; - - case SLJIT_MUL: - SLJIT_ASSERT(!(flags & SRC2_IMM)); - - if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) - return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); -#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) - if (op & SLJIT_32) - return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); - FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); - return push_inst(compiler, MFLO | D(dst), DR(dst)); -#else /* SLJIT_MIPS_REV < 1 */ - FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); - return push_inst(compiler, MFLO | D(dst), DR(dst)); -#endif /* SLJIT_MIPS_REV >= 6 */ - } - -#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) - FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); -#else /* SLJIT_MIPS_REV < 6 */ - FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); -#endif /* SLJIT_MIPS_REV >= 6 */ - FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); - return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); - - case SLJIT_AND: - EMIT_LOGICAL(ANDI, AND); - return SLJIT_SUCCESS; - - case SLJIT_OR: - EMIT_LOGICAL(ORI, OR); - return SLJIT_SUCCESS; - - case SLJIT_XOR: - EMIT_LOGICAL(XORI, XOR); - return SLJIT_SUCCESS; - - case SLJIT_SHL: - EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV); - return SLJIT_SUCCESS; - - case SLJIT_LSHR: - EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV); - return SLJIT_SUCCESS; - - case SLJIT_ASHR: - EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV); - return SLJIT_SUCCESS; - } - - SLJIT_UNREACHABLE(); - return SLJIT_SUCCESS; -} - static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) { FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst))); @@ -653,14 +238,20 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile if (type & SLJIT_CALL_RETURN) PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); - PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins)); SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); - PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0)); + if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; if (!(type & SLJIT_CALL_RETURN)) { - jump->flags |= IS_JAL | IS_CALL; + jump->flags |= IS_JAL; + + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + jump->flags |= IS_CALL; + PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); } else PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS)); @@ -668,6 +259,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS)); + /* Maximum number of instructions required for generating a constant. */ + compiler->size += 6; return jump; } @@ -680,16 +273,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); + src = PIC_ADDR_REG; + srcw = 0; + } + + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); + src = PIC_ADDR_REG; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); + + if (ins != NOP) + FAIL_IF(push_inst(compiler, ins, MOVABLE_INS)); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); + } + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); if (src & SLJIT_IMM) FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); - else if (FAST_IS_REG(src)) + else if (src != PIC_ADDR_REG) FAIL_IF(push_inst(compiler, DADDU | S(src) | TA(0) | D(PIC_ADDR_REG), DR(PIC_ADDR_REG))); - else if (src & SLJIT_MEM) { - ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw)); - } if (type & SLJIT_CALL_RETURN) FAIL_IF(emit_stack_frame_release(compiler, 0, &ins)); diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c index be5cb22..928e111 100644 --- a/src/sljit/sljitNativeMIPS_common.c +++ b/src/sljit/sljitNativeMIPS_common.c @@ -151,12 +151,18 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define BREAK (HI(0) | LO(13)) #define CFC1 (HI(17) | (2 << 21)) #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) +#define C_EQ_S (HI(17) | CMP_FMT_S | LO(2)) +#define C_OLE_S (HI(17) | CMP_FMT_S | LO(6)) +#define C_OLT_S (HI(17) | CMP_FMT_S | LO(4)) #define C_UEQ_S (HI(17) | CMP_FMT_S | LO(3)) #define C_ULE_S (HI(17) | CMP_FMT_S | LO(7)) #define C_ULT_S (HI(17) | CMP_FMT_S | LO(5)) #define C_UN_S (HI(17) | CMP_FMT_S | LO(1)) #define C_FD (FD(TMP_FREG3)) #else /* SLJIT_MIPS_REV < 6 */ +#define C_EQ_S (HI(17) | FMT_S | LO(50)) +#define C_OLE_S (HI(17) | FMT_S | LO(54)) +#define C_OLT_S (HI(17) | FMT_S | LO(52)) #define C_UEQ_S (HI(17) | FMT_S | LO(51)) #define C_ULE_S (HI(17) | FMT_S | LO(55)) #define C_ULT_S (HI(17) | FMT_S | LO(53)) @@ -206,9 +212,13 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define JR (HI(0) | LO(8)) #endif /* SLJIT_MIPS_REV >= 6 */ #define LD (HI(55)) +#define LDL (HI(26)) +#define LDR (HI(27)) #define LDC1 (HI(53)) #define LUI (HI(15)) #define LW (HI(35)) +#define LWL (HI(34)) +#define LWR (HI(38)) #define LWC1 (HI(49)) #define MFC1 (HI(17)) #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) @@ -236,6 +246,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define OR (HI(0) | LO(37)) #define ORI (HI(13)) #define SD (HI(63)) +#define SDL (HI(44)) +#define SDR (HI(45)) #define SDC1 (HI(61)) #define SLT (HI(0) | LO(42)) #define SLTI (HI(10)) @@ -250,6 +262,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define SUB_S (HI(17) | FMT_S | LO(1)) #define SUBU (HI(0) | LO(35)) #define SW (HI(43)) +#define SWL (HI(42)) +#define SWR (HI(46)) #define SWC1 (HI(57)) #define TRUNC_W_S (HI(17) | FMT_S | LO(13)) #define XOR (HI(0) | LO(38)) @@ -277,11 +291,13 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = { #define ADDU_W ADDU #define ADDIU_W ADDIU #define SLL_W SLL +#define SRA_W SRA #define SUBU_W SUBU #else #define ADDU_W DADDU #define ADDIU_W DADDIU #define SLL_W DSLL +#define SRA_W DSRA #define SUBU_W DSUBU #endif @@ -315,19 +331,21 @@ static SLJIT_INLINE sljit_ins invert_branch(sljit_uw flags) return (1 << 16); } -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; sljit_ins *inst; sljit_ins saved_inst; + inst = (sljit_ins *)jump->addr; + #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) - return code_ptr; + goto exit; #else if (jump->flags & SLJIT_REWRITABLE_JUMP) - return code_ptr; + goto exit; #endif if (jump->flags & JUMP_ADDR) @@ -337,13 +355,12 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } - inst = (sljit_ins *)jump->addr; if (jump->flags & IS_COND) inst--; #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (jump->flags & IS_CALL) - goto keep_address; + goto preserve_addr; #endif /* B instructions. */ @@ -364,15 +381,14 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i jump->addr -= 2 * sizeof(sljit_ins); return inst; } - } - else { + } else { diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1) - executable_offset) >> 2; if (diff <= SIMM_MAX && diff >= SIMM_MIN) { jump->flags |= PATCH_B; if (!(jump->flags & IS_COND)) { inst[0] = (jump->flags & IS_JAL) ? BAL : B; - inst[1] = NOP; + /* Keep inst[1] */ return inst + 1; } inst[0] ^= invert_branch(jump->flags); @@ -415,36 +431,46 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if ((target_addr & ~(sljit_uw)0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~(sljit_uw)0xfffffff)) { jump->flags |= PATCH_J; inst[0] = (jump->flags & IS_JAL) ? JAL : J; - inst[1] = NOP; + /* Keep inst[1] */ return inst + 1; } } + if (jump->flags & IS_COND) + inst++; + #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) -keep_address: +preserve_addr: if (target_addr <= 0x7fffffff) { jump->flags |= PATCH_ABS32; - if (jump->flags & IS_COND) { - inst[0] -= 4; - inst++; - } - inst[2] = inst[6]; - inst[3] = inst[7]; + if (jump->flags & IS_COND) + inst[-1] -= 4; + + inst[2] = inst[0]; + inst[3] = inst[1]; return inst + 3; } if (target_addr <= 0x7fffffffffffl) { jump->flags |= PATCH_ABS48; - if (jump->flags & IS_COND) { - inst[0] -= 2; - inst++; - } - inst[4] = inst[6]; - inst[5] = inst[7]; + if (jump->flags & IS_COND) + inst[-1] -= 2; + + inst[4] = inst[0]; + inst[5] = inst[1]; return inst + 5; } #endif - return code_ptr; +exit: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + inst[2] = inst[0]; + inst[3] = inst[1]; + return inst + 3; +#else + inst[6] = inst[0]; + inst[7] = inst[1]; + return inst + 7; +#endif } #ifdef __GNUC__ @@ -459,30 +485,52 @@ static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) { if (max_label < 0x80000000l) { - put_label->flags = 0; + put_label->flags = PATCH_ABS32; return 1; } if (max_label < 0x800000000000l) { - put_label->flags = 1; + put_label->flags = PATCH_ABS48; return 3; } - put_label->flags = 2; + put_label->flags = 0; return 5; } -static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) -{ - sljit_uw addr = put_label->label->addr; - sljit_ins *inst = (sljit_ins *)put_label->addr; - sljit_u32 reg = *inst; +#endif /* SLJIT_CONFIG_MIPS_64 */ - if (put_label->flags == 0) { +static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +{ + struct sljit_jump *jump; + struct sljit_put_label *put_label; + sljit_uw flags; + sljit_ins *inst; + sljit_uw addr; + + if (reg != 0) { + jump = (struct sljit_jump*)dst; + flags = jump->flags; + inst = (sljit_ins*)jump->addr; + addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + } else { + put_label = (struct sljit_put_label*)dst; +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + flags = put_label->flags; +#endif + inst = (sljit_ins*)put_label->addr; + addr = put_label->label->addr; + reg = *inst; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + inst[0] = LUI | T(reg) | IMM(addr >> 16); +#else /* !SLJIT_CONFIG_MIPS_32 */ + if (flags & PATCH_ABS32) { SLJIT_ASSERT(addr < 0x80000000l); inst[0] = LUI | T(reg) | IMM(addr >> 16); } - else if (put_label->flags == 1) { + else if (flags & PATCH_ABS48) { SLJIT_ASSERT(addr < 0x800000000000l); inst[0] = LUI | T(reg) | IMM(addr >> 32); inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff); @@ -497,12 +545,11 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16); inst += 4; } +#endif /* SLJIT_CONFIG_MIPS_32 */ inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff); } -#endif - SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { struct sljit_memory_fragment *buf; @@ -557,11 +604,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } if (jump && jump->addr == word_count) { #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - jump->addr = (sljit_uw)(code_ptr - 3); + word_count += 2; #else - jump->addr = (sljit_uw)(code_ptr - 7); + word_count += 6; #endif - code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); + jump->addr = (sljit_uw)(code_ptr - 1); + code_ptr = detect_jump_type(jump, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == word_count) { @@ -571,7 +619,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil if (put_label && put_label->addr == word_count) { SLJIT_ASSERT(put_label->label); put_label->addr = (sljit_uw)code_ptr; -#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + code_ptr += 1; + word_count += 1; +#else code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); word_count += 5; #endif @@ -579,8 +630,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } next_addr = compute_next_addr(label, jump, const_, put_label); } - code_ptr ++; - word_count ++; + code_ptr++; + word_count++; } while (buf_ptr < buf_end); buf = buf->next; @@ -617,51 +668,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil break; } - /* Set the fields of immediate loads. */ -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[1] |= (sljit_ins)addr & 0xffff; -#else - if (jump->flags & PATCH_ABS32) { - SLJIT_ASSERT(addr <= 0x7fffffff); - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[1] |= (sljit_ins)addr & 0xffff; - break; - } - - if (jump->flags & PATCH_ABS48) { - SLJIT_ASSERT(addr <= 0x7fffffffffffl); - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 32) & 0xffff; - buf_ptr[1] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[3] |= (sljit_ins)addr & 0xffff; - break; - } - - SLJIT_ASSERT(((buf_ptr[0] | buf_ptr[1] | buf_ptr[3] | buf_ptr[5]) & 0xffff) == 0); - buf_ptr[0] |= (sljit_ins)(addr >> 48) & 0xffff; - buf_ptr[1] |= (sljit_ins)(addr >> 32) & 0xffff; - buf_ptr[3] |= (sljit_ins)(addr >> 16) & 0xffff; - buf_ptr[5] |= (sljit_ins)addr & 0xffff; -#endif + load_addr_to_reg(jump, PIC_ADDR_REG); } while (0); jump = jump->next; } put_label = compiler->put_labels; while (put_label) { -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - addr = put_label->label->addr; - buf_ptr = (sljit_ins *)put_label->addr; - - SLJIT_ASSERT((buf_ptr[0] & 0xffe00000) == LUI && (buf_ptr[1] & 0xfc000000) == ORI); - buf_ptr[0] |= (addr >> 16) & 0xffff; - buf_ptr[1] |= addr & 0xffff; -#else - put_label_set(put_label); -#endif + load_addr_to_reg(put_label, 0); put_label = put_label->next; } @@ -713,6 +727,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) } } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); +} + /* --------------------------------------------------------------------- */ /* Entry, exit */ /* --------------------------------------------------------------------- */ @@ -770,13 +789,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi { sljit_ins base; sljit_s32 i, tmp, offset; - sljit_s32 arg_count, word_arg_count, saved_arg_count, float_arg_count; + sljit_s32 arg_count, word_arg_count, float_arg_count; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { if ((local_size & SSIZE_OF(sw)) != 0) @@ -791,27 +811,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #endif compiler->local_size = local_size; -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - tmp = arg_types >> SLJIT_ARG_SHIFT; - arg_count = 0; offset = 0; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (!(options & SLJIT_ENTER_REG_ARG)) { + tmp = arg_types >> SLJIT_ARG_SHIFT; + arg_count = 0; - while (tmp) { - offset = arg_count; - if ((tmp & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) { - if ((arg_count & 0x1) != 0) + while (tmp) { + offset = arg_count; + if ((tmp & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) { + if ((arg_count & 0x1) != 0) + arg_count++; arg_count++; + } + arg_count++; + tmp >>= SLJIT_ARG_SHIFT; } - arg_count++; - tmp >>= SLJIT_ARG_SHIFT; + compiler->args_size = (sljit_uw)arg_count << 2; + offset = (offset >= 4) ? (offset << 2) : 0; } - - compiler->args_size = (sljit_uw)arg_count << 2; - offset = (offset >= 4) ? (offset << 2) : 0; -#else /* !SLJIT_CONFIG_MIPS_32 */ - offset = 0; #endif /* SLJIT_CONFIG_MIPS_32 */ if (local_size + offset <= -SIMM_MIN) { @@ -820,9 +840,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi base = S(SLJIT_SP); offset = local_size - SSIZE_OF(sw); } else { - FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size)); + FAIL_IF(load_immediate(compiler, OTHER_FLAG, local_size)); FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | TA(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP))); base = S(TMP_REG2); offset = -SSIZE_OF(sw); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -833,7 +853,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offset), MOVABLE_INS)); tmp = SLJIT_S0 - saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { offset -= SSIZE_OF(sw); FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offset), MOVABLE_INS)); } @@ -860,10 +880,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, SDC1 | base | FT(i) | IMM(offset), MOVABLE_INS)); } + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + arg_types >>= SLJIT_ARG_SHIFT; arg_count = 0; word_arg_count = 0; - saved_arg_count = 0; float_arg_count = 0; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) @@ -970,7 +992,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { if ((local_size & SSIZE_OF(sw)) != 0) @@ -993,10 +1015,11 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit sljit_s32 saveds = compiler->saveds; sljit_s32 fsaveds = compiler->fsaveds; sljit_s32 fscratches = compiler->fscratches; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); local_size = compiler->local_size; - tmp = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + tmp = GET_SAVED_REGISTERS_SIZE(scratches, saveds - kept_saveds_count, 1); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { if ((tmp & SSIZE_OF(sw)) != 0) @@ -1028,7 +1051,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit FAIL_IF(push_inst(compiler, STACK_LOAD | S(SLJIT_SP) | TA(RETURN_ADDR_REG) | IMM(offset), RETURN_ADDR_REG)); tmp = SLJIT_S0 - saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - kept_saveds_count; i > tmp; i--) { offset -= SSIZE_OF(sw); FAIL_IF(push_inst(compiler, STACK_LOAD | S(SLJIT_SP) | T(i) | IMM(offset), MOVABLE_INS)); } @@ -1134,9 +1157,10 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag return 0; } +#define TO_ARGW_HI(argw) (((argw) & ~0xffff) + (((argw) & 0x8000) ? 0x10000 : 0)) + /* See getput_arg below. - Note: can_cache is called only for binary operators. Those - operators always uses word arguments without write back. */ + Note: can_cache is called only for binary operators. */ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); @@ -1151,7 +1175,8 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj } if (arg == next_arg) { - if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) + || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw)) return 1; return 0; } @@ -1163,6 +1188,7 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) { sljit_s32 tmp_ar, base, delay_slot; + sljit_sw offset, argw_hi; SLJIT_ASSERT(arg & SLJIT_MEM); if (!(next_arg & SLJIT_MEM)) { @@ -1170,6 +1196,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl next_argw = 0; } + /* Since tmp can be the same as base or offset registers, + * these might be unavailable after modifying tmp. */ if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { tmp_ar = reg_ar; delay_slot = reg_ar; @@ -1217,35 +1245,39 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } - if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { - if (argw != compiler->cache_argw) { - FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); - compiler->cache_argw = argw; - } - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); - } + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(argw - compiler->cache_argw), delay_slot); - if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { - if (argw != compiler->cache_argw) - FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); - } - else { + if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) { + offset = argw - compiler->cache_argw; + } else { compiler->cache_arg = SLJIT_MEM; - FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + + argw_hi = TO_ARGW_HI(argw); + + if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + compiler->cache_argw = argw; + offset = 0; + } else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw_hi)); + compiler->cache_argw = argw_hi; + offset = argw & 0xffff; + argw = argw_hi; + } } - compiler->cache_argw = argw; if (!base) - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(offset), delay_slot); if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { compiler->cache_arg = arg; FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar) | IMM(offset), delay_slot); } FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar) | IMM(offset), delay_slot); } static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw) @@ -1277,12 +1309,12 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } - FAIL_IF(load_immediate(compiler, tmp_ar, argw)); + FAIL_IF(load_immediate(compiler, tmp_ar, TO_ARGW_HI(argw))); if (base != 0) FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar) | IMM(argw), delay_slot); } static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) @@ -1292,6 +1324,497 @@ static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, slji return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); } +#define EMIT_LOGICAL(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | S(src1) | T(src2) | D(dst), DR(dst))); \ + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + +#define SELECT_OP(a, b) (b) + +#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \ + } + +#else /* !SLJIT_CONFIG_MIPS_32 */ + +#define SELECT_OP(a, b) \ + (!(op & SLJIT_32) ? a : b) + +#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ + if (flags & SRC2_IMM) { \ + if (src2 >= 32) { \ + SLJIT_ASSERT(!(op & SLJIT_32)); \ + ins = op_dimm32; \ + src2 -= 32; \ + } \ + else \ + ins = (op & SLJIT_32) ? op_imm : op_dimm; \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ + } \ + else { \ + ins = (op & SLJIT_32) ? op_v : op_dv; \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \ + } + +#endif /* SLJIT_CONFIG_MIPS_32 */ + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled; +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_ins ins; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ +#else /* !SLJIT_CONFIG_MIPS_32 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ +#else /* !SLJIT_CONFIG_MIPS_32 */ +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_32) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + case SLJIT_MOV_U32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + if (dst == src2) + return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 2 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); + return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_MIPS_64 */ + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst))); +#else /* SLJIT_MIPS_REV < 1 */ + /* Nearly all instructions are unmovable in the following sequence. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + /* Check zero. */ + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS)); +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_32) ? 32 : 64), UNMOVABLE_INS)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst))); + /* Loop for searching the highest bit. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); + FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); +#endif /* SLJIT_MIPS_REV >= 1 */ + return SLJIT_SUCCESS; + + case SLJIT_ADD: + /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + carry_src_ar = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || carry_src_ar != 0) { + if (src1 != dst) + carry_src_ar = DR(src1); + else if (src2 != dst) + carry_src_ar = DR(src2); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | TA(0) | DA(OTHER_FLAG), OTHER_FLAG)); + carry_src_ar = OTHER_FLAG; + } + } + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (is_overflow || carry_src_ar != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTIU | S(dst) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + else + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(carry_src_ar) | DA(OTHER_FLAG), OTHER_FLAG)); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + return push_inst(compiler, XOR | S(TMP_REG1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_ADDC: + carry_src_ar = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } else { + if (carry_src_ar != 0) { + if (src1 != dst) + carry_src_ar = DR(src1); + else if (src2 != dst) + carry_src_ar = DR(src2); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + carry_src_ar = EQUAL_FLAG; + } + } + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (carry_src_ar != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTIU | S(dst) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(carry_src_ar) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + + if (carry_src_ar == 0) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + /* Set carry flag. */ + return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_LESS: + case SLJIT_GREATER_EQUAL: + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_GREATER: + case SLJIT_LESS_EQUAL: + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER_EQUAL: + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + case SLJIT_SIG_GREATER: + case SLJIT_SIG_LESS_EQUAL: + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + break; + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + return push_inst(compiler, XOR | S(TMP_REG1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + + if (!is_carry) + return SLJIT_SUCCESS; + + return push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)); +#elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1) +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); +#else /* !SLJIT_CONFIG_MIPS_32 */ + if (op & SLJIT_32) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); + FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif /* SLJIT_CONFIG_MIPS_32 */ +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif /* SLJIT_MIPS_REV >= 6 */ + } + +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); +#else /* SLJIT_MIPS_REV < 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); +#endif /* SLJIT_MIPS_REV >= 6 */ + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV); + return SLJIT_SUCCESS; + + case SLJIT_LSHR: + EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV); + return SLJIT_SUCCESS; + + case SLJIT_ASHR: + EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV); + return SLJIT_SUCCESS; + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +#define CHECK_IMM(flags, srcw) \ + ((!((flags) & LOGICAL_OP) && ((srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)) \ + || (((flags) & LOGICAL_OP) && !((srcw) & ~UIMM_MAX))) + static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, @@ -1325,25 +1848,18 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 flags |= SLOW_DEST; if (flags & IMM_OP) { - if ((src2 & SLJIT_IMM) && src2w) { - if ((!(flags & LOGICAL_OP) && (src2w <= SIMM_MAX && src2w >= SIMM_MIN)) - || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_MAX))) { - flags |= SRC2_IMM; - src2_r = src2w; - } - } - if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { - if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN)) - || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) { - flags |= SRC2_IMM; - src2_r = src1w; + if ((src2 & SLJIT_IMM) && src2w != 0 && CHECK_IMM(flags, src2w)) { + flags |= SRC2_IMM; + src2_r = src2w; + } else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && CHECK_IMM(flags, src1w)) { + flags |= SRC2_IMM; + src2_r = src1w; - /* And swap arguments. */ - src1 = src2; - src1w = src2w; - src2 = SLJIT_IMM; - /* src2w = src2_r unneeded. */ - } + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ } } @@ -1429,6 +1945,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 return SLJIT_SUCCESS; } +#undef CHECK_IMM + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) @@ -1662,10 +2180,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); } @@ -1746,14 +2261,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); - if (FAST_IS_REG(dst)) - return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS); + if (FAST_IS_REG(dst)) { + FAIL_IF(push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + return SLJIT_SUCCESS; + } /* Store the integer value from a VFP register. */ return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, FR(TMP_FREG1), dst, dstw, 0, 0); #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef is_long +# undef flags #endif } @@ -1769,19 +2289,25 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - if (FAST_IS_REG(src)) + if (FAST_IS_REG(src)) { FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); - else if (src & SLJIT_MEM) { +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + } else if (src & SLJIT_MEM) { /* Load the integer value into a VFP register. */ - FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); + FAIL_IF(emit_op_mem2(compiler, (flags ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, FR(TMP_FREG1), src, srcw, dst, dstw)); } else { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) srcw = (sljit_s32)srcw; #endif FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif } FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | ((((sljit_ins)op & SLJIT_32) ^ SLJIT_32) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); @@ -1812,20 +2338,38 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile } switch (GET_FLAG_TYPE(op)) { - case SLJIT_EQUAL_F64: - case SLJIT_NOT_EQUAL_F64: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + inst = C_EQ_S; + break; + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: inst = C_UEQ_S; break; - case SLJIT_LESS_F64: - case SLJIT_GREATER_EQUAL_F64: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + inst = C_OLT_S; + break; + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_ORDERED_GREATER_EQUAL: inst = C_ULT_S; break; - case SLJIT_GREATER_F64: - case SLJIT_LESS_EQUAL_F64: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_LESS_EQUAL: inst = C_ULE_S; break; + case SLJIT_F_LESS_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + inst = C_OLE_S; + break; default: - SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED_F64 || GET_FLAG_TYPE(op) == SLJIT_ORDERED_F64); + SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED || GET_FLAG_TYPE(op) == SLJIT_ORDERED); inst = C_UN_S; break; } @@ -1871,6 +2415,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); break; case SLJIT_CONV_F64_FROM_F32: + /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */ FAIL_IF(push_inst(compiler, CVT_S_S | (sljit_ins)((op & SLJIT_32) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); op ^= SLJIT_32; break; @@ -1959,6 +2504,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return SLJIT_SUCCESS; } +#undef FLOAT_DATA +#undef FMT + /* --------------------------------------------------------------------- */ /* Other instructions */ /* --------------------------------------------------------------------- */ @@ -2000,18 +2548,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi } #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -#define JUMP_LENGTH 4 +#define BRANCH_LENGTH 4 #else -#define JUMP_LENGTH 8 +#define BRANCH_LENGTH 8 #endif #define BR_Z(src) \ - inst = BEQ | SA(src) | TA(0) | JUMP_LENGTH; \ + inst = BEQ | SA(src) | TA(0) | BRANCH_LENGTH; \ flags = IS_BIT26_COND; \ delay_check = src; #define BR_NZ(src) \ - inst = BNE | SA(src) | TA(0) | JUMP_LENGTH; \ + inst = BNE | SA(src) | TA(0) | BRANCH_LENGTH; \ flags = IS_BIT26_COND; \ delay_check = src; @@ -2029,11 +2577,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #else /* SLJIT_MIPS_REV < 6 */ #define BR_T() \ - inst = BC1T | JUMP_LENGTH; \ + inst = BC1T | BRANCH_LENGTH; \ flags = IS_BIT16_COND; \ delay_check = FCSR_FCC; #define BR_F() \ - inst = BC1F | JUMP_LENGTH; \ + inst = BC1F | BRANCH_LENGTH; \ flags = IS_BIT16_COND; \ delay_check = FCSR_FCC; @@ -2077,16 +2625,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile case SLJIT_NOT_CARRY: BR_NZ(OTHER_FLAG); break; - case SLJIT_NOT_EQUAL_F64: - case SLJIT_GREATER_EQUAL_F64: - case SLJIT_GREATER_F64: - case SLJIT_ORDERED_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: BR_T(); break; - case SLJIT_EQUAL_F64: - case SLJIT_LESS_F64: - case SLJIT_LESS_EQUAL_F64: - case SLJIT_UNORDERED_F64: + case SLJIT_F_EQUAL: + case SLJIT_F_LESS: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED: BR_F(); break; default: @@ -2102,8 +2662,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (inst) PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); - PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - if (type <= SLJIT_JUMP) PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); else { @@ -2113,6 +2671,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif return jump; } @@ -2151,11 +2716,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler compiler->cache_arg = 0; compiler->cache_argw = 0; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + flags = WORD_DATA | LOAD_DATA; +#else /* !SLJIT_CONFIG_MIPS_32 */ flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#endif /* SLJIT_CONFIG_MIPS_32 */ + if (src1 & SLJIT_MEM) { PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); src1 = TMP_REG1; } + if (src2 & SLJIT_MEM) { PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); src2 = TMP_REG2; @@ -2172,7 +2743,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler jump->flags |= IS_BIT26_COND; if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) jump->flags |= IS_MOVABLE; - PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | BRANCH_LENGTH, UNMOVABLE_INS)); } else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { inst = NOP; @@ -2219,7 +2790,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler break; } } - PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | BRANCH_LENGTH, UNMOVABLE_INS)); } else { if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) { @@ -2244,20 +2815,26 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } jump->flags |= IS_BIT26_COND; - PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS)); + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | BRANCH_LENGTH, UNMOVABLE_INS)); } - PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); jump->addr = compiler->size; PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif return jump; } #undef RESOLVE_IMM1 #undef RESOLVE_IMM2 -#undef JUMP_LENGTH +#undef BRANCH_LENGTH #undef BR_Z #undef BR_NZ #undef BR_T @@ -2283,7 +2860,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (compiler->delay_slot != UNMOVABLE_INS) jump->flags |= IS_MOVABLE; - FAIL_IF(emit_const(compiler, TMP_REG2, 0)); src = TMP_REG2; } else if (src & SLJIT_MEM) { @@ -2291,9 +2867,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi src = TMP_REG2; } - FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); - if (jump) + if (type <= SLJIT_JUMP) + FAIL_IF(push_inst(compiler, JR | S(src), UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, JALR | S(src) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + + if (jump != NULL) { jump->addr = compiler->size; + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 2; +#else + compiler->size += 6; +#endif + } FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); return SLJIT_SUCCESS; } @@ -2302,7 +2890,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_s32 src_ar, dst_ar; + sljit_s32 src_ar, dst_ar, invert; sljit_s32 saved_op = op; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) sljit_s32 mem_type = WORD_DATA; @@ -2323,32 +2911,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), dst, dstw, dst, dstw)); - switch (type & 0xff) { - case SLJIT_EQUAL: - case SLJIT_NOT_EQUAL: - FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); - src_ar = dst_ar; - break; - case SLJIT_OVERFLOW: - case SLJIT_NOT_OVERFLOW: - if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { - src_ar = OTHER_FLAG; + if (type < SLJIT_F_EQUAL) { + src_ar = OTHER_FLAG; + invert = type & 0x1; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { + src_ar = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; + invert ^= 0x1; break; } - FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); - src_ar = dst_ar; - type ^= 0x1; /* Flip type bit for the XORI below. */ - break; - case SLJIT_GREATER_F64: - case SLJIT_LESS_EQUAL_F64: - type ^= 0x1; /* Flip type bit for the XORI below. */ - /* fallthrough */ - case SLJIT_EQUAL_F64: - case SLJIT_NOT_EQUAL_F64: - case SLJIT_LESS_F64: - case SLJIT_GREATER_EQUAL_F64: - case SLJIT_UNORDERED_F64: - case SLJIT_ORDERED_F64: + } else { + invert = 0; + + switch (type) { + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: + invert = 1; + break; + } + #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) FAIL_IF(push_inst(compiler, MFC1 | TA(dst_ar) | FS(TMP_FREG3), dst_ar)); #else /* SLJIT_MIPS_REV < 6 */ @@ -2357,14 +2958,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar)); FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar)); src_ar = dst_ar; - break; - - default: - src_ar = OTHER_FLAG; - break; } - if (type & 0x1) { + if (invert) { FAIL_IF(push_inst(compiler, XORI | SA(src_ar) | TA(dst_ar) | IMM(1), dst_ar)); src_ar = dst_ar; } @@ -2414,7 +3010,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil dst_reg &= ~SLJIT_32; - switch (type & 0xff) { + switch (type) { case SLJIT_EQUAL: ins = MOVZ | TA(EQUAL_FLAG); break; @@ -2435,16 +3031,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil case SLJIT_NOT_OVERFLOW: ins = MOVZ | TA(OTHER_FLAG); break; - case SLJIT_EQUAL_F64: - case SLJIT_LESS_F64: - case SLJIT_LESS_EQUAL_F64: - case SLJIT_UNORDERED_F64: + case SLJIT_F_EQUAL: + case SLJIT_F_LESS: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED: ins = MOVT; break; - case SLJIT_NOT_EQUAL_F64: - case SLJIT_GREATER_EQUAL_F64: - case SLJIT_GREATER_F64: - case SLJIT_ORDERED_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED: ins = MOVF; break; default: @@ -2460,6 +3068,265 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil #endif /* SLJIT_MIPS_REV >= 1 */ } +#if !(defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6) + +static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s16 max_offset) +{ + sljit_s32 arg = *mem; + sljit_sw argw = *memw; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG1) | SH_IMM(argw), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | S(arg & REG_MASK) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); + } else + FAIL_IF(push_inst(compiler, ADDU_W | S(arg & REG_MASK) | T(OFFS_REG(arg)) | D(TMP_REG1), DR(TMP_REG1))); + + *mem = TMP_REG1; + *memw = 0; + + return SLJIT_SUCCESS; + } + + if (argw <= max_offset && argw >= SIMM_MIN) { + *mem = arg & REG_MASK; + return SLJIT_SUCCESS; + } + + *mem = TMP_REG1; + + if ((sljit_s16)argw > max_offset) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), argw)); + *memw = 0; + } else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), TO_ARGW_HI(argw))); + *memw = (sljit_s16)argw; + } + + if ((arg & REG_MASK) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDU_W | S(arg & REG_MASK) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1)); +} + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define MEM16_IMM_FIRST(memw) IMM((memw) + 1) +#define MEM16_IMM_SECOND(memw) IMM(memw) +#define MEMF64_FS_FIRST(freg) FS(freg) +#define MEMF64_FS_SECOND(freg) (FS(freg) | ((sljit_ins)1 << 11)) +#else /* !SLJIT_LITTLE_ENDIAN */ +#define MEM16_IMM_FIRST(memw) IMM(memw) +#define MEM16_IMM_SECOND(memw) IMM((memw) + 1) +#define MEMF64_FS_FIRST(freg) (FS(freg) | ((sljit_ins)1 << 11)) +#define MEMF64_FS_SECOND(freg) FS(freg) +#endif /* SLJIT_LITTLE_ENDIAN */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_s32 op = type & 0xff; + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + + if (type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) + return SLJIT_ERR_UNSUPPORTED; + + switch (op) { + case SLJIT_MOV_U8: + case SLJIT_MOV_S8: + flags = BYTE_DATA; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + if (op == SLJIT_MOV_S8) + flags |= SIGNED_DATA; + + return emit_op_mem(compiler, flags, DR(reg), mem, memw); + + case SLJIT_MOV_U16: + case SLJIT_MOV_S16: + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 1)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SRA_W | T(reg) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), MOVABLE_INS)); + return push_inst(compiler, data_transfer_insts[BYTE_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), MOVABLE_INS); + } + + flags = BYTE_DATA | LOAD_DATA; + + if (op == SLJIT_MOV_S16) + flags |= SIGNED_DATA; + + FAIL_IF(push_inst(compiler, data_transfer_insts[flags] | S(mem) | T(TMP_REG2) | MEM16_IMM_FIRST(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, data_transfer_insts[BYTE_DATA | LOAD_DATA] | S(mem) | T(reg) | MEM16_IMM_SECOND(memw), DR(reg))); + FAIL_IF(push_inst(compiler, SLL_W | T(TMP_REG2) | D(TMP_REG2) | SH_IMM(8), DR(TMP_REG2))); + return push_inst(compiler, OR | S(reg) | T(TMP_REG2) | D(reg), DR(reg)); + + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (type & SLJIT_MEM_ALIGNED_32) { + flags = WORD_DATA; + if (!(type & SLJIT_MEM_STORE)) + flags |= LOAD_DATA; + + return emit_op_mem(compiler, flags, DR(reg), mem, memw); + } +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 7)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SDL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS)); + return push_inst(compiler, SDR | S(mem) | T(reg) | IMM(memw + 7), MOVABLE_INS); + } + + if (mem == reg) { + FAIL_IF(push_inst(compiler, DADDU | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, LDL | S(mem) | T(reg) | IMM(memw), DR(reg))); + return push_inst(compiler, LDR | S(mem) | T(reg) | IMM(memw + 7), DR(reg)); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - 3)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(reg) | IMM(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(reg) | IMM(memw + 3), MOVABLE_INS); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (mem == reg) { + FAIL_IF(push_inst(compiler, ADDU | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM(memw), DR(reg))); + return push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg)); + +#else /* !SLJIT_CONFIG_MIPS_32 */ + if (mem == reg) { + FAIL_IF(push_inst(compiler, DADDU | S(mem) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + mem = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(reg) | IMM(memw), DR(reg))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(reg) | IMM(memw + 3), DR(reg))); + + if (op == SLJIT_MOV_U32) { +#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2) + return push_inst(compiler, DINSU | T(reg) | SA(0) | (31 << 11) | (0 << 11), DR(reg)); +#else /* SLJIT_MIPS_REV < 1 */ + FAIL_IF(push_inst(compiler, DSLL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg))); + return push_inst(compiler, DSRL32 | T(reg) | D(reg) | SH_IMM(0), DR(reg)); +#endif /* SLJIT_MIPS_REV >= 2 */ + } + + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_MIPS_32 */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 mem, sljit_sw memw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + + if (type & (SLJIT_MEM_PRE | SLJIT_MEM_POST)) + return SLJIT_ERR_UNSUPPORTED; + + FAIL_IF(update_mem_addr(compiler, &mem, &memw, SIMM_MAX - (type & SLJIT_32) ? 3 : 7)); + SLJIT_ASSERT(FAST_IS_REG(mem) && mem != TMP_REG2); + + if (type & SLJIT_MEM_STORE) { + if (type & SLJIT_32) { + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS); + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), DR(TMP_REG2))); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), MOVABLE_INS)); + + FAIL_IF(push_inst(compiler, MFC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), DR(TMP_REG2))); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, SWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), MOVABLE_INS)); + return push_inst(compiler, SWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS); +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, MFC1 | (1 << 21) | T(TMP_REG2) | FS(freg), DR(TMP_REG2))); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, SDL | S(mem) | T(TMP_REG2) | IMM(memw), MOVABLE_INS)); + return push_inst(compiler, SDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), MOVABLE_INS); +#endif /* SLJIT_CONFIG_MIPS_32 */ + } + + if (type & SLJIT_32) { + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | FS(freg), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 3), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_FIRST(freg), MOVABLE_INS)); + + FAIL_IF(push_inst(compiler, LWL | S(mem) | T(TMP_REG2) | IMM(memw + 4), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LWR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, MTC1 | T(TMP_REG2) | MEMF64_FS_SECOND(freg), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif +#else /* !SLJIT_CONFIG_MIPS_32 */ + FAIL_IF(push_inst(compiler, LDL | S(mem) | T(TMP_REG2) | IMM(memw), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, LDR | S(mem) | T(TMP_REG2) | IMM(memw + 7), DR(TMP_REG2))); + + FAIL_IF(push_inst(compiler, MTC1 | (1 << 21) | T(TMP_REG2) | FS(freg), MOVABLE_INS)); +#if (!defined SLJIT_MIPS_REV || SLJIT_MIPS_REV <= 3) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif +#endif /* SLJIT_CONFIG_MIPS_32 */ + return SLJIT_SUCCESS; +} + +#undef MEM16_IMM_FIRST +#undef MEM16_IMM_SECOND +#undef MEMF64_FS_FIRST +#undef MEMF64_FS_SECOND + +#endif /* !SLJIT_MIPS_REV || SLJIT_MIPS_REV < 6 */ + +#undef TO_ARGW_HI + SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) { struct sljit_const *const_; @@ -2477,7 +3344,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw)); return const_; } @@ -2496,15 +3363,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj set_put_label(put_label, compiler, 0); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); -#else PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r, UNMOVABLE_INS)); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->size += 1; +#else compiler->size += 5; #endif if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw)); return put_label; } diff --git a/src/sljit/sljitNativePPC_32.c b/src/sljit/sljitNativePPC_32.c index 95fe6bb..1eb518a 100644 --- a/src/sljit/sljitNativePPC_32.c +++ b/src/sljit/sljitNativePPC_32.c @@ -277,8 +277,3 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) -{ - sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); -} diff --git a/src/sljit/sljitNativePPC_64.c b/src/sljit/sljitNativePPC_64.c index d104f6d..6149101 100644 --- a/src/sljit/sljitNativePPC_64.c +++ b/src/sljit/sljitNativePPC_64.c @@ -502,8 +502,3 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 5); } - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) -{ - sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); -} diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c index 8bfdc69..719e772 100644 --- a/src/sljit/sljitNativePPC_common.c +++ b/src/sljit/sljitNativePPC_common.c @@ -368,7 +368,7 @@ static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label) else { inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48); inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff); - inst ++; + inst++; } inst[1] = RLDI(reg, reg, 32, 31, 1); @@ -497,8 +497,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } next_addr = compute_next_addr(label, jump, const_, put_label); } - code_ptr ++; - word_count ++; + code_ptr++; + word_count++; } while (buf_ptr < buf_end); buf = buf->next; @@ -649,6 +649,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) } } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL); +} + /* --------------------------------------------------------------------- */ /* Entry, exit */ /* --------------------------------------------------------------------- */ @@ -721,7 +726,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi { sljit_s32 i, tmp, base, offset; sljit_s32 word_arg_count = 0; - sljit_s32 saved_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) sljit_s32 arg_count = 0; #endif @@ -730,8 +735,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 0) + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + + if (!(options & SLJIT_ENTER_REG_ARG)) + local_size += SSIZE_OF(sw); + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; compiler->local_size = local_size; @@ -770,11 +779,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, STFD | FS(i) | A(base) | IMM(offset))); } - offset -= SSIZE_OF(sw); - FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset))); + if (!(options & SLJIT_ENTER_REG_ARG)) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(base) | IMM(offset))); + } tmp = SLJIT_S0 - saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { offset -= SSIZE_OF(sw); FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(base) | IMM(offset))); } @@ -785,9 +796,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(base) | IMM(local_size + LR_SAVE_OFFSET))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; while (arg_types > 0) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { @@ -829,13 +845,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 0) + GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + + if (!(options & SLJIT_ENTER_REG_ARG)) + local_size += SSIZE_OF(sw); + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; return SLJIT_SUCCESS; } - static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { sljit_s32 i, tmp, base, offset; @@ -867,11 +886,13 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) FAIL_IF(push_inst(compiler, LFD | FS(i) | A(base) | IMM(offset))); } - offset -= SSIZE_OF(sw); - FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset))); + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | S(TMP_ZERO) | A(base) | IMM(offset))); + } tmp = SLJIT_S0 - compiler->saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { offset -= SSIZE_OF(sw); FAIL_IF(push_inst(compiler, STACK_LOAD | S(i) | A(base) | IMM(offset))); } @@ -1626,7 +1647,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } } - if (GET_OPCODE(op) != SLJIT_AND) { + if (!HAS_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) { /* Unlike or and xor, the and resets unwanted bits as well. */ if (TEST_UI_IMM(src2, src2w)) { compiler->imm = (sljit_ins)src2w; @@ -1663,10 +1684,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); } @@ -1818,6 +1836,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp if (src & SLJIT_IMM) { if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) srcw = (sljit_s32)srcw; + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; } @@ -1899,7 +1918,21 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile src2 = TMP_FREG2; } - return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2)); + FAIL_IF(push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2))); + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return push_inst(compiler, CROR | ((4 + 2) << 21) | ((4 + 2) << 16) | ((4 + 3) << 11)); + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_ORDERED_GREATER_EQUAL: + return push_inst(compiler, CROR | ((4 + 0) << 21) | ((4 + 0) << 16) | ((4 + 3) << 11)); + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + return push_inst(compiler, CROR | ((4 + 1) << 21) | ((4 + 1) << 16) | ((4 + 3) << 11)); + } + + return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -2076,38 +2109,50 @@ static sljit_ins get_bo_bi_flags(struct sljit_compiler *compiler, sljit_s32 type case SLJIT_SIG_LESS_EQUAL: return (4 << 21) | (1 << 16); - case SLJIT_LESS_F64: - return (12 << 21) | ((4 + 0) << 16); - - case SLJIT_GREATER_EQUAL_F64: - return (4 << 21) | ((4 + 0) << 16); - - case SLJIT_GREATER_F64: - return (12 << 21) | ((4 + 1) << 16); - - case SLJIT_LESS_EQUAL_F64: - return (4 << 21) | ((4 + 1) << 16); - case SLJIT_OVERFLOW: return (12 << 21) | (3 << 16); case SLJIT_NOT_OVERFLOW: return (4 << 21) | (3 << 16); - case SLJIT_EQUAL_F64: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: + return (12 << 21) | ((4 + 0) << 16); + + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return (4 << 21) | ((4 + 0) << 16); + + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + return (12 << 21) | ((4 + 1) << 16); + + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return (4 << 21) | ((4 + 1) << 16); + + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: return (12 << 21) | ((4 + 2) << 16); - case SLJIT_NOT_EQUAL_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: return (4 << 21) | ((4 + 2) << 16); - case SLJIT_UNORDERED_F64: + case SLJIT_UNORDERED: return (12 << 21) | ((4 + 3) << 16); - case SLJIT_ORDERED_F64: + case SLJIT_ORDERED: return (4 << 21) | ((4 + 3) << 16); default: - SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_CDECL); + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG); return (20 << 21); } } @@ -2154,7 +2199,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); #endif if (type & SLJIT_CALL_RETURN) { @@ -2162,11 +2208,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, type); } @@ -2240,14 +2282,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi } #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - FAIL_IF(call_with_args(compiler, arg_types, &src)); -#endif - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(call_with_args(compiler, arg_types, &src)); #endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_ijump(compiler, type, src, srcw); } @@ -2279,7 +2318,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co bit = 0; from_xer = 0; - switch (type & 0xff) { + switch (type) { case SLJIT_LESS: case SLJIT_SIG_LESS: break; @@ -2332,38 +2371,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co invert = (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD) != 0; break; - case SLJIT_LESS_F64: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_LESS: bit = 4 + 0; break; - case SLJIT_GREATER_EQUAL_F64: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: bit = 4 + 0; invert = 1; break; - case SLJIT_GREATER_F64: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_GREATER: bit = 4 + 1; break; - case SLJIT_LESS_EQUAL_F64: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: bit = 4 + 1; invert = 1; break; - case SLJIT_EQUAL_F64: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: bit = 4 + 2; break; - case SLJIT_NOT_EQUAL_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: bit = 4 + 2; invert = 1; break; - case SLJIT_UNORDERED_F64: + case SLJIT_UNORDERED: bit = 4 + 3; break; - case SLJIT_ORDERED_F64: + case SLJIT_ORDERED: bit = 4 + 3; invert = 1; break; @@ -2385,10 +2436,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return emit_op_mem(compiler, input_flags, reg, dst, dstw, TMP_REG1); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); + if (dst & SLJIT_MEM) return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0); return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0); @@ -2414,6 +2463,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile CHECK_ERROR(); CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + if (type & SLJIT_MEM_UNALIGNED) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + if (type & SLJIT_MEM_POST) return SLJIT_ERR_UNSUPPORTED; @@ -2510,6 +2562,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + if (type & SLJIT_MEM_UNALIGNED) + return sljit_emit_fmem_unaligned(compiler, type, freg, mem, memw); + if (type & SLJIT_MEM_POST) return SLJIT_ERR_UNSUPPORTED; @@ -2587,3 +2642,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj return put_label; } + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/src/sljit/sljitNativeRISCV_32.c b/src/sljit/sljitNativeRISCV_32.c new file mode 100644 index 0000000..24b8dc3 --- /dev/null +++ b/src/sljit/sljitNativeRISCV_32.c @@ -0,0 +1,72 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) +{ + SLJIT_UNUSED_ARG(tmp_r); + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); + + if (imm & 0x800) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + + if ((imm & 0xfff) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) +{ + if ((init_value & 0x800) != 0) + init_value += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff))); + return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + SLJIT_UNUSED_ARG(executable_offset); + + if ((new_target & 0x800) != 0) + new_target += 0x1000; + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + + SLJIT_ASSERT((inst[0] & 0x7f) == LUI); + inst[0] = (inst[0] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff); + SLJIT_ASSERT((inst[1] & 0x707f) == ADDI || (inst[1] & 0x707f) == JALR); + inst[1] = (inst[1] & 0xfffff) | IMM_I(new_target); + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/src/sljit/sljitNativeRISCV_64.c b/src/sljit/sljitNativeRISCV_64.c new file mode 100644 index 0000000..16a5f5f --- /dev/null +++ b/src/sljit/sljitNativeRISCV_64.c @@ -0,0 +1,181 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm, sljit_s32 tmp_r) +{ + sljit_sw high; + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm)); + + if (imm <= 0x7fffffffl && imm >= S32_MIN) { + if (imm > S32_MAX) { + SLJIT_ASSERT((imm & 0x800) != 0); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + } + + if ((imm & 0x800) != 0) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + + if ((imm & 0xfff) == 0) + return SLJIT_SUCCESS; + + return push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + } + + /* Trailing zeroes could be used to produce shifted immediates. */ + + if (imm <= 0x7ffffffffffl && imm >= -0x80000000000l) { + high = imm >> 12; + + if (imm & 0x800) + high = ~high; + + if (high > S32_MAX) { + SLJIT_ASSERT((high & 0x800) != 0); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(high & ~0xfff))); + + if ((high & 0xfff) != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); + } + + FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12))); + + if ((imm & 0xfff) != 0) + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + + return SLJIT_SUCCESS; + } + + high = imm >> 32; + imm = (sljit_s32)imm; + + if ((imm & 0x80000000l) != 0) + high = ~high; + + if (high <= 0x7ffff && high >= -0x80000) { + FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high << 12))); + high = 0x1000; + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high & ~0xfff))); + high &= 0xfff; + } + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(TMP_ZERO) | IMM_I(imm))); + imm = 0; + } else if (imm > S32_MAX) { + SLJIT_ASSERT((imm & 0x800) != 0); + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + imm = 0x1000 | (imm & 0xfff); + } else { + if ((imm & 0x800) != 0) + imm += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + imm &= 0xfff; + } + + if ((high & 0xfff) != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(tmp_r) | RS1(tmp_r) | IMM_I(high))); + + if (imm & 0x1000) + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + else if (imm != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + + FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(tmp_r) | IMM_I((high & 0x1000) ? 20 : 32))); + return push_inst(compiler, XOR | RD(dst_r) | RS1(dst_r) | RS2(tmp_r)); +} + +static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins) +{ + sljit_sw high; + + if ((init_value & 0x800) != 0) + init_value += 0x1000; + + high = init_value >> 32; + + if ((init_value & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + + FAIL_IF(push_inst(compiler, LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff))); + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high))); + FAIL_IF(push_inst(compiler, LUI | RD(dst) | (sljit_ins)(init_value & ~0xfff))); + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(32))); + FAIL_IF(push_inst(compiler, XOR | RD(dst) | RS1(dst) | RS2(TMP_REG3))); + return push_inst(compiler, last_ins | RS1(dst) | IMM_I(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) +{ + sljit_ins *inst = (sljit_ins*)addr; + sljit_sw high; + SLJIT_UNUSED_ARG(executable_offset); + + if ((new_target & 0x800) != 0) + new_target += 0x1000; + + high = (sljit_sw)new_target >> 32; + + if ((new_target & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0); + + SLJIT_ASSERT((inst[0] & 0x7f) == LUI); + inst[0] = (inst[0] & 0xfff) | (sljit_ins)(high & ~0xfff); + SLJIT_ASSERT((inst[1] & 0x707f) == ADDI); + inst[1] = (inst[1] & 0xfffff) | IMM_I(high); + SLJIT_ASSERT((inst[2] & 0x7f) == LUI); + inst[2] = (inst[2] & 0xfff) | (sljit_ins)((sljit_sw)new_target & ~0xfff); + SLJIT_ASSERT((inst[5] & 0x707f) == ADDI || (inst[5] & 0x707f) == JALR); + inst[5] = (inst[5] & 0xfffff) | IMM_I(new_target); + SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1); + + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/src/sljit/sljitNativeRISCV_common.c b/src/sljit/sljitNativeRISCV_common.c new file mode 100644 index 0000000..cc7d4a0 --- /dev/null +++ b/src/sljit/sljitNativeRISCV_common.c @@ -0,0 +1,2521 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + return "RISC-V-32" SLJIT_CPUINFO; +#else /* !SLJIT_CONFIG_RISCV_32 */ + return "RISC-V-64" SLJIT_CPUINFO; +#endif /* SLJIT_CONFIG_RISCV_32 */ +} + +/* Length of an instruction word + Both for riscv-32 and riscv-64 */ +typedef sljit_u32 sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO 0 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5) +#define RETURN_ADDR_REG TMP_REG2 +#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6) + +#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) + +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 10, 11, 12, 13, 14, 15, 16, 17, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 2, 6, 1, 7, 5, 28 +}; + +static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { + 0, 10, 11, 12, 13, 14, 15, 16, 17, 2, 3, 4, 5, 6, 7, 28, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 0, 1, +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define RD(rd) ((sljit_ins)reg_map[rd] << 7) +#define RS1(rs1) ((sljit_ins)reg_map[rs1] << 15) +#define RS2(rs2) ((sljit_ins)reg_map[rs2] << 20) +#define FRD(rd) ((sljit_ins)freg_map[rd] << 7) +#define FRS1(rs1) ((sljit_ins)freg_map[rs1] << 15) +#define FRS2(rs2) ((sljit_ins)freg_map[rs2] << 20) +#define IMM_I(imm) ((sljit_ins)(imm) << 20) +#define IMM_S(imm) ((((sljit_ins)(imm) & 0xfe0) << 20) | (((sljit_ins)(imm) & 0x1f) << 7)) + +/* Represents funct(i) parts of the instructions. */ +#define OPC(o) ((sljit_ins)(o)) +#define F3(f) ((sljit_ins)(f) << 12) +#define F12(f) ((sljit_ins)(f) << 20) +#define F7(f) ((sljit_ins)(f) << 25) + +#define ADD (F7(0x0) | F3(0x0) | OPC(0x33)) +#define ADDI (F3(0x0) | OPC(0x13)) +#define AND (F7(0x0) | F3(0x7) | OPC(0x33)) +#define ANDI (F3(0x7) | OPC(0x13)) +#define AUIPC (OPC(0x17)) +#define BEQ (F3(0x0) | OPC(0x63)) +#define BNE (F3(0x1) | OPC(0x63)) +#define BLT (F3(0x4) | OPC(0x63)) +#define BGE (F3(0x5) | OPC(0x63)) +#define BLTU (F3(0x6) | OPC(0x63)) +#define BGEU (F3(0x7) | OPC(0x63)) +#define DIV (F7(0x1) | F3(0x4) | OPC(0x33)) +#define DIVU (F7(0x1) | F3(0x5) | OPC(0x33)) +#define EBREAK (F12(0x1) | F3(0x0) | OPC(0x73)) +#define FADD_S (F7(0x0) | F3(0x7) | OPC(0x53)) +#define FDIV_S (F7(0xc) | F3(0x7) | OPC(0x53)) +#define FEQ_S (F7(0x50) | F3(0x2) | OPC(0x53)) +#define FLD (F3(0x3) | OPC(0x7)) +#define FLE_S (F7(0x50) | F3(0x0) | OPC(0x53)) +#define FLT_S (F7(0x50) | F3(0x1) | OPC(0x53)) +#define FSD (F3(0x3) | OPC(0x27)) +/* These conversion opcodes are partly defined. */ +#define FCVT_S_D (F7(0x20) | OPC(0x53)) +#define FCVT_S_W (F7(0x68) | OPC(0x53)) +#define FCVT_W_S (F7(0x60) | F3(0x1) | OPC(0x53)) +#define FMUL_S (F7(0x8) | F3(0x7) | OPC(0x53)) +#define FSGNJ_S (F7(0x10) | F3(0x0) | OPC(0x53)) +#define FSGNJN_S (F7(0x10) | F3(0x1) | OPC(0x53)) +#define FSGNJX_S (F7(0x10) | F3(0x2) | OPC(0x53)) +#define FSUB_S (F7(0x4) | F3(0x7) | OPC(0x53)) +#define JAL (OPC(0x6f)) +#define JALR (F3(0x0) | OPC(0x67)) +#define LD (F3(0x3) | OPC(0x3)) +#define LUI (OPC(0x37)) +#define LW (F3(0x2) | OPC(0x3)) +#define MUL (F7(0x1) | F3(0x0) | OPC(0x33)) +#define MULH (F7(0x1) | F3(0x1) | OPC(0x33)) +#define MULHU (F7(0x1) | F3(0x3) | OPC(0x33)) +#define OR (F7(0x0) | F3(0x6) | OPC(0x33)) +#define ORI (F3(0x6) | OPC(0x13)) +#define REM (F7(0x1) | F3(0x6) | OPC(0x33)) +#define REMU (F7(0x1) | F3(0x7) | OPC(0x33)) +#define SD (F3(0x3) | OPC(0x23)) +#define SLL (F7(0x0) | F3(0x1) | OPC(0x33)) +#define SLLI (IMM_I(0x0) | F3(0x1) | OPC(0x13)) +#define SLT (F7(0x0) | F3(0x2) | OPC(0x33)) +#define SLTI (F3(0x2) | OPC(0x13)) +#define SLTU (F7(0x0) | F3(0x3) | OPC(0x33)) +#define SLTUI (F3(0x3) | OPC(0x13)) +#define SRL (F7(0x0) | F3(0x5) | OPC(0x33)) +#define SRLI (IMM_I(0x0) | F3(0x5) | OPC(0x13)) +#define SRA (F7(0x20) | F3(0x5) | OPC(0x33)) +#define SRAI (IMM_I(0x400) | F3(0x5) | OPC(0x13)) +#define SUB (F7(0x20) | F3(0x0) | OPC(0x33)) +#define SW (F3(0x2) | OPC(0x23)) +#define XOR (F7(0x0) | F3(0x4) | OPC(0x33)) +#define XORI (F3(0x4) | OPC(0x13)) + +#define SIMM_MAX (0x7ff) +#define SIMM_MIN (-0x800) +#define BRANCH_MAX (0xfff) +#define BRANCH_MIN (-0x1000) +#define JUMP_MAX (0xfffff) +#define JUMP_MIN (-0x100000) + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) +#define S32_MAX (0x7ffff7ffl) +#define S32_MIN (-0x80000000l) +#define S44_MAX (0x7fffffff7ffl) +#define S52_MAX (0x7ffffffffffffl) +#endif + +static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_s32 push_imm_s_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_sw imm) +{ + return push_inst(compiler, ins | IMM_S(imm)); +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + + inst = (sljit_ins *)jump->addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + goto exit; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; + } + + diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset; + + if (jump->flags & IS_COND) { + inst--; + diff += SSIZE_OF(ins); + + if (diff >= BRANCH_MIN && diff <= BRANCH_MAX) { + jump->flags |= PATCH_B; + inst[0] = (inst[0] & 0x1fff07f) ^ 0x1000; + jump->addr = (sljit_uw)inst; + return inst; + } + + inst++; + diff -= SSIZE_OF(ins); + } + + if (diff >= JUMP_MIN && diff <= JUMP_MAX) { + if (jump->flags & IS_COND) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; +#else + inst[-1] -= (sljit_ins)(5 * sizeof(sljit_ins)) << 7; +#endif + } + + jump->flags |= PATCH_J; + return inst; + } + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (diff >= S32_MIN && diff <= S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_REL32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= (sljit_uw)S32_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS32; + inst[1] = inst[0]; + return inst + 1; + } + + if (target_addr <= S44_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(2 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS44; + inst[3] = inst[0]; + return inst + 4; + } + + if (target_addr <= S52_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS52; + inst[4] = inst[0]; + return inst + 4; + } +#endif + +exit: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + inst[1] = inst[0]; + return inst + 1; +#else + inst[5] = inst[0]; + return inst + 5; +#endif +} + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + +static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label) +{ + if (max_label <= (sljit_uw)S32_MAX) { + put_label->flags = PATCH_ABS32; + return 1; + } + + if (max_label <= S44_MAX) { + put_label->flags = PATCH_ABS44; + return 3; + } + + if (max_label <= S52_MAX) { + put_label->flags = PATCH_ABS52; + return 4; + } + + put_label->flags = 0; + return 5; +} + +#endif /* SLJIT_CONFIG_RISCV_64 */ + +static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) +{ + struct sljit_jump *jump = NULL; + struct sljit_put_label *put_label; + sljit_uw flags; + sljit_ins *inst; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_sw high; +#endif + sljit_uw addr; + + if (reg != 0) { + jump = (struct sljit_jump*)dst; + flags = jump->flags; + inst = (sljit_ins*)jump->addr; + addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + } else { + put_label = (struct sljit_put_label*)dst; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + flags = put_label->flags; +#endif + inst = (sljit_ins*)put_label->addr; + addr = put_label->label->addr; + reg = *inst; + } + + if ((addr & 0x800) != 0) + addr += 0x1000; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); +#else /* !SLJIT_CONFIG_RISCV_32 */ + + if (flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= S32_MAX); + inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + } else if (flags & PATCH_ABS44) { + high = (sljit_sw)addr >> 12; + SLJIT_ASSERT((sljit_uw)high <= 0x7fffffff); + + if (high > S32_MAX) { + SLJIT_ASSERT((high & 0x800) != 0); + inst[0] = LUI | RD(reg) | (sljit_ins)0x80000000u; + inst[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high); + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + inst[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff); + inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high); + } + + inst[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12); + inst += 2; + } else { + high = (sljit_sw)addr >> 32; + + if ((addr & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + + if (flags & PATCH_ABS52) { + SLJIT_ASSERT(addr <= S52_MAX); + inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12); + } else { + inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff); + inst[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high); + inst++; + } + + inst[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + inst[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32); + inst[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3); + inst += 3; + } +#endif /* !SLJIT_CONFIG_RISCV_32 */ + + if (jump != NULL) { + SLJIT_ASSERT((inst[1] & 0x707f) == JALR); + inst[1] = (inst[1] & 0xfffff) | IMM_I(addr); + } else + inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw next_addr; + sljit_sw executable_offset; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + struct sljit_put_label *put_label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + next_addr = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + put_label = compiler->put_labels; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + if (next_addr == word_count) { + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + SLJIT_ASSERT(!put_label || put_label->addr >= word_count); + + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + word_count += 1; +#else + word_count += 5; +#endif + jump->addr = (sljit_uw)code_ptr; + code_ptr = detect_jump_type(jump, code, executable_offset); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + if (put_label && put_label->addr == word_count) { + SLJIT_ASSERT(put_label->label); + put_label->addr = (sljit_uw)code_ptr; +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + code_ptr += 1; + word_count += 1; +#else + code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size)); + word_count += 5; +#endif + put_label = put_label->next; + } + next_addr = compute_next_addr(label, jump, const_, put_label); + } + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = (sljit_uw)(code_ptr - code); + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(!put_label); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + if (!(jump->flags & (PATCH_B | PATCH_J | PATCH_REL32))) { + load_addr_to_reg(jump, TMP_REG1); + break; + } + + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); + + if (jump->flags & PATCH_B) { + SLJIT_ASSERT((sljit_sw)addr >= BRANCH_MIN && (sljit_sw)addr <= BRANCH_MAX); + addr = ((addr & 0x800) >> 4) | ((addr & 0x1e) << 7) | ((addr & 0x7e0) << 20) | ((addr & 0x1000) << 19); + buf_ptr[0] |= (sljit_ins)addr; + break; + } + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (jump->flags & PATCH_REL32) { + SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX); + + if ((addr & 0x800) != 0) + addr += 0x1000; + + buf_ptr[0] = AUIPC | RD(TMP_REG1) | (sljit_ins)((sljit_sw)addr & ~0xfff); + SLJIT_ASSERT((buf_ptr[1] & 0x707f) == JALR); + buf_ptr[1] |= IMM_I(addr); + break; + } +#endif + + SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX); + addr = (addr & 0xff000) | ((addr & 0x800) << 9) | ((addr & 0x7fe) << 20) | ((addr & 0x100000) << 11); + buf_ptr[0] = JAL | RD((jump->flags & IS_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | (sljit_ins)addr; + } while (0); + jump = jump->next; + } + + put_label = compiler->put_labels; + while (put_label) { + load_addr_to_reg(put_label, 0); + put_label = put_label->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; + compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); + return code; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: + case SLJIT_HAS_ZERO_REGISTER: + return 1; + default: + return 0; + } +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + return (type >= SLJIT_ORDERED_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define ARG_TEST 0x00020 +#define ALT_KEEP_CACHE 0x00040 +#define CUMULATIVE_OP 0x00080 +#define IMM_OP 0x00100 +#define MOVE_OP 0x00200 +#define SRC2_IMM 0x00400 + +#define UNUSED_DEST 0x00800 +#define REG_DEST 0x01000 +#define REG1_SOURCE 0x02000 +#define REG2_SOURCE 0x04000 +#define SLOW_SRC1 0x08000 +#define SLOW_SRC2 0x10000 +#define SLOW_DEST 0x20000 + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define STACK_STORE SW +#define STACK_LOAD LW +#else +#define STACK_STORE SD +#define STACK_LOAD LD +#endif + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#include "sljitNativeRISCV_32.c" +#else +#include "sljitNativeRISCV_64.c" +#endif + +#define STACK_MAX_DISTANCE (-SIMM_MIN) + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + sljit_s32 i, tmp, offset; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); +#endif + local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= STACK_MAX_DISTANCE) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size))); + offset = local_size - SSIZE_OF(sw); + local_size = 0; + } else { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(STACK_MAX_DISTANCE))); + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(load_immediate(compiler, TMP_REG1, local_size, TMP_REG3)); + offset = STACK_MAX_DISTANCE - SSIZE_OF(sw); + } + + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(RETURN_ADDR_REG), offset)); + + tmp = SLJIT_S0 - saveds; + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset)); + } + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset)); + } + + for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset)); + } + + if (local_size > STACK_MAX_DISTANCE) + FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG1))); + else if (local_size > 0) + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size))); + + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types > 0) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_S0 - saved_arg_count) | RS1(tmp) | IMM_I(0))); + saved_arg_count++; + } + tmp++; + } + + arg_types >>= SLJIT_ARG_SHIFT; + } + + return SLJIT_SUCCESS; +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, + sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, + sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) { + if ((local_size & SSIZE_OF(sw)) != 0) + local_size += SSIZE_OF(sw); + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); + } +#else + local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sizeof(sljit_f64)); +#endif + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf; + + return SLJIT_SUCCESS; +} + +#define STACK_MAX_DISTANCE (-SIMM_MIN - 16) + +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) +{ + sljit_s32 i, tmp, offset; + sljit_s32 local_size = compiler->local_size; + + if (local_size > STACK_MAX_DISTANCE) { + local_size -= STACK_MAX_DISTANCE; + + if (local_size > STACK_MAX_DISTANCE) { + FAIL_IF(load_immediate(compiler, TMP_REG2, local_size, TMP_REG3)); + FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG2))); + } else + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size))); + + local_size = STACK_MAX_DISTANCE; + } + + SLJIT_ASSERT(local_size > 0); + + offset = local_size - SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RS1(SLJIT_SP) | IMM_I(offset))); + + tmp = SLJIT_S0 - compiler->saveds; + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offset -= SSIZE_OF(sw); + FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + /* This alignment is valid because offset is not used after storing FPU regs. */ + if ((offset & SSIZE_OF(sw)) != 0) + offset -= SSIZE_OF(sw); +#endif + + tmp = SLJIT_FS0 - compiler->fsaveds; + for (i = SLJIT_FS0; i > tmp; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { + offset -= SSIZE_OF(f64); + FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset))); + } + + return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size)); +} + +#undef STACK_MAX_DISTANCE + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_void(compiler)); + + FAIL_IF(emit_stack_frame_release(compiler)); + return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0)); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static const sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */), +/* u w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */), +/* u b s */ F3(0x0) | OPC(0x23) /* sb */, +/* u b l */ F3(0x4) | OPC(0x3) /* lbu */, +/* u h s */ F3(0x1) | OPC(0x23) /* sh */, +/* u h l */ F3(0x5) | OPC(0x3) /* lhu */, +/* u i s */ F3(0x2) | OPC(0x23) /* sw */, +/* u i l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x6) | OPC(0x3) /* lwu */), + +/* s w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */), +/* s w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */), +/* s b s */ F3(0x0) | OPC(0x23) /* sb */, +/* s b l */ F3(0x0) | OPC(0x3) /* lb */, +/* s h s */ F3(0x1) | OPC(0x23) /* sh */, +/* s h l */ F3(0x1) | OPC(0x3) /* lh */, +/* s i s */ F3(0x2) | OPC(0x23) /* sw */, +/* s i l */ F3(0x2) | OPC(0x3) /* lw */, + +/* d s */ F3(0x3) | OPC(0x27) /* fsd */, +/* d l */ F3(0x3) | OPC(0x7) /* fld */, +/* s s */ F3(0x2) | OPC(0x27) /* fsw */, +/* s l */ F3(0x2) | OPC(0x7) /* flw */, +}; + +#undef ARCH_32_64 + +static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 base, sljit_sw offset) +{ + sljit_ins ins; + + SLJIT_ASSERT(FAST_IS_REG(base) && offset <= 0xfff && offset >= SIMM_MIN); + + ins = data_transfer_insts[flags & MEM_MASK] | RS1(base); + if (flags & LOAD_DATA) + ins |= ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | IMM_I(offset); + else + ins |= ((flags & MEM_MASK) <= GPR_REG ? RS2(reg) : FRS2(reg)) | IMM_S(offset); + + return push_inst(compiler, ins); +} + +/* Can perform an operation using at most 1 instruction. */ +static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(push_mem_inst(compiler, flags, reg, arg & REG_MASK, argw)); + return -1; + } + return 0; +} + +#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0)) + +/* See getput_arg below. + Note: can_cache is called only for binary operators. */ +static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) + || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = TMP_REG1; + sljit_sw offset, argw_hi; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + /* Since tmp can be the same as base or offset registers, + * these might be unavailable after modifying tmp. */ + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_r = reg; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (arg == compiler->cache_arg) + return push_mem_inst(compiler, flags, reg, TMP_REG3, 0); + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, TMP_REG3, 0); + } + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(TMP_REG3))); + return push_mem_inst(compiler, flags, reg, tmp_r, 0); + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(OFFS_REG(arg)) | IMM_I(argw))); + } + + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3))); + tmp_r = TMP_REG3; + } + else + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3))); + return push_mem_inst(compiler, flags, reg, tmp_r, 0); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) + return push_mem_inst(compiler, flags, reg, TMP_REG3, argw - compiler->cache_argw); + + if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= SIMM_MAX) && (argw - compiler->cache_argw >= SIMM_MIN)) { + offset = argw - compiler->cache_argw; + } else { + compiler->cache_arg = SLJIT_MEM; + + argw_hi = TO_ARGW_HI(argw); + + if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw, tmp_r)); + compiler->cache_argw = argw; + offset = 0; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi, tmp_r)); + compiler->cache_argw = argw_hi; + offset = argw & 0xfff; + argw = argw_hi; + } + } + + if (!base) + return push_mem_inst(compiler, flags, reg, TMP_REG3, offset); + + if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, TMP_REG3, offset); + } + + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(TMP_REG3) | RS2(base))); + return push_mem_inst(compiler, flags, reg, tmp_r, offset); +} + +static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +{ + sljit_s32 base = arg & REG_MASK; + sljit_s32 tmp_r = TMP_REG1; + + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_r = reg; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if (SLJIT_UNLIKELY(argw)) { + FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(OFFS_REG(arg)) | IMM_I(argw))); + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(tmp_r))); + } + else + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(OFFS_REG(arg)))); + return push_mem_inst(compiler, flags, reg, tmp_r, 0); + } + + FAIL_IF(load_immediate(compiler, tmp_r, TO_ARGW_HI(argw), TMP_REG3)); + + if (base != 0) + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(tmp_r))); + + return push_mem_inst(compiler, flags, reg, tmp_r, argw & 0xfff); +} + +#undef TO_ARGW_HI + +static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define WORD 0 +#define IMM_EXTEND(v) (IMM_I(v)) +#else /* !SLJIT_CONFIG_RISCV_32 */ +#define WORD word +#define IMM_EXTEND(v) (IMM_I((op & SLJIT_32) ? (v) : (32 + (v)))) +#endif /* SLJIT_CONFIG_RISCV_32 */ + +#define EMIT_LOGICAL(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | RD(dst) | RS1(src1) | IMM_I(src2))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RS1(src1) | RS2(src2))); \ + } + +#define EMIT_SHIFT(op_imm, op_reg) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_imm | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); \ + } \ + else { \ + if (op & SLJIT_SET_Z) \ + FAIL_IF(push_inst(compiler, op_reg | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); \ + if (!(flags & UNUSED_DEST)) \ + FAIL_IF(push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(src2))); \ + } + +static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_s32 src1, sljit_sw src2) +{ + sljit_s32 is_overflow, is_carry, carry_src_r, is_handled; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (op & SLJIT_32) >> 5; + + SLJIT_ASSERT(word == 0 || word == 0x8); +#endif /* SLJIT_CONFIG_RISCV_64 */ + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, ADDI | RD(dst) | RS1(src2) | IMM_I(0)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ANDI | RD(dst) | RS1(src2) | IMM_I(0xff)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S8: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(24))); + return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(24)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_U16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); + return push_inst(compiler, SRLI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S16: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16))); + return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_MOV_U32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src2) | IMM_I(32))); + return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32)); + } + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; + + case SLJIT_MOV_S32: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) + return push_inst(compiler, ADDI | 0x8 | RD(dst) | RS1(src2) | IMM_I(0)); + SLJIT_ASSERT(dst == src2); + return SLJIT_SUCCESS; +#endif /* SLJIT_CONFIG_RISCV_64 */ + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + /* Nearly all instructions are unmovable in the following sequence. */ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src2) | IMM_I(0))); + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(TMP_ZERO) | IMM_I(32))); +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (op & SLJIT_32) { + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(src2) | IMM_I(32))); + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(TMP_ZERO) | IMM_I(32))); + } else { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src2) | IMM_I(0))); + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(TMP_ZERO) | IMM_I(64))); + } +#endif /* SLJIT_CONFIG_RISCV_32 */ + /* Check zero. */ + FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(6 * SSIZE_OF(ins)) << 7))); + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(TMP_ZERO) | IMM_I(0))); + FAIL_IF(push_inst(compiler, BLT | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(4 * SSIZE_OF(ins)) << 7))); + /* Loop for searching the highest bit. */ + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(dst) | IMM_I(1))); + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(1))); + FAIL_IF(push_inst(compiler, BGE | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(0x1fc001d - 1 * SSIZE_OF(ins)) << 7))); + return SLJIT_SUCCESS; + + case SLJIT_ADD: + /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */ + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + carry_src_r = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + else + FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADD | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + if (is_overflow || carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(0))); + carry_src_r = OTHER_FLAG; + } + } + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (is_overflow || carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(carry_src_r))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG)); + + case SLJIT_ADDC: + carry_src_r = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2))); + } else { + if (carry_src_r != 0) { + if (src1 != dst) + carry_src_r = (sljit_s32)src1; + else if (src2 != dst) + carry_src_r = (sljit_s32)src2; + else { + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + carry_src_r = EQUAL_FLAG; + } + } + + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */ + if (carry_src_r != 0) { + if (flags & SRC2_IMM) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(src2))); + else + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(dst) | RS2(carry_src_r))); + } + + FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + + if (carry_src_r == 0) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG))); + /* Set carry flag. */ + return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG)); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_LESS: + case SLJIT_GREATER_EQUAL: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + break; + case SLJIT_GREATER: + case SLJIT_LESS_EQUAL: + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src2) | RS2(src1))); + break; + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER_EQUAL: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + break; + case SLJIT_SIG_GREATER: + case SLJIT_SIG_LESS_EQUAL: + FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src2) | RS2(src1))); + break; + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0))); + else + FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1))); + } + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2))); + } + else { + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + else if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + + /* Only the zero flag is needed. */ + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + if (!is_overflow) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG))); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0))); + FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31))); + return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG)); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); + + FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2))); + } + else { + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2))); + } + + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RS1(dst) | RS2(OTHER_FLAG))); + + FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG))); + + if (!is_carry) + return SLJIT_SUCCESS; + + return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(TMP_REG1)); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + + if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW) + return push_inst(compiler, MUL | WORD | RD(dst) | RS1(src1) | RS2(src2)); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (word) { + FAIL_IF(push_inst(compiler, MUL | RD(OTHER_FLAG) | RS1(src1) | RS2(src2))); + FAIL_IF(push_inst(compiler, MUL | 0x8 | RD(dst) | RS1(src1) | RS2(src2))); + return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG)); + } +#endif /* SLJIT_CONFIG_RISCV_64 */ + + FAIL_IF(push_inst(compiler, MULH | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); + FAIL_IF(push_inst(compiler, MUL | RD(dst) | RS1(src1) | RS2(src2))); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(31))); +#else /* !SLJIT_CONFIG_RISCV_32 */ + FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(63))); +#endif /* SLJIT_CONFIG_RISCV_32 */ + return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(OTHER_FLAG)); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + EMIT_SHIFT(SLLI, SLL); + return SLJIT_SUCCESS; + + case SLJIT_LSHR: + EMIT_SHIFT(SRLI, SRL); + return SLJIT_SUCCESS; + + case SLJIT_ASHR: + EMIT_SHIFT(SRAI, SRA); + return SLJIT_SUCCESS; + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +#undef IMM_EXTEND + +static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_s32 dst_r = TMP_REG2; + sljit_s32 src1_r; + sljit_sw src2_r = 0; + sljit_s32 sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (dst == TMP_REG2) { + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (flags & MOVE_OP) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src2w; + } + else if ((flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); + src1_r = TMP_REG1; + } + else + src1_r = TMP_ZERO; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) + dst_r = (sljit_s32)src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w, TMP_REG3)); + src2_r = sugg_src2_r; + } + else { + src2_r = TMP_ZERO; + if (flags & MOVE_OP) { + if (dst & SLJIT_MEM) + dst_r = 0; + else + op = SLJIT_MOV; + } + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) +{ +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_ins word = (op & SLJIT_32) >> 5; + + SLJIT_ASSERT(word == 0 || word == 0x8); +#endif /* SLJIT_CONFIG_RISCV_64 */ + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, EBREAK); + case SLJIT_NOP: + return push_inst(compiler, ADDI | RD(TMP_ZERO) | RS1(TMP_ZERO) | IMM_I(0)); + case SLJIT_LMUL_UW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0))); + FAIL_IF(push_inst(compiler, MULHU | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1)); + case SLJIT_LMUL_SW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0))); + FAIL_IF(push_inst(compiler, MULH | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1)); + case SLJIT_DIVMOD_UW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0))); + FAIL_IF(push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, REMU | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1)); + case SLJIT_DIVMOD_SW: + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0))); + FAIL_IF(push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1))); + return push_inst(compiler, REM | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1)); + case SLJIT_DIV_UW: + return push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); + case SLJIT_DIV_SW: + return push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)); + case SLJIT_ENDBR: + case SLJIT_SKIP_FRAMES_BEFORE_RETURN: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +#undef WORD + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (op & SLJIT_32) + flags = INT_DATA | SIGNED_DATA; +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + case SLJIT_MOV_U32: + case SLJIT_MOV_S32: + case SLJIT_MOV32: +#endif + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + case SLJIT_MOV_U32: + return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u32)srcw : srcw); + + case SLJIT_MOV_S32: + /* Logical operators have no W variant, so sign extended input is necessary for them. */ + case SLJIT_MOV32: + return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s32)srcw : srcw); +#endif + + case SLJIT_MOV_U8: + return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); + + case SLJIT_MOV_S8: + return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); + + case SLJIT_MOV_U16: + return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); + + case SLJIT_MOV_S16: + return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, SLJIT_XOR | (op & (SLJIT_32 | SLJIT_SET_Z)), flags, dst, dstw, src, srcw, SLJIT_IMM, -1); + + case SLJIT_CLZ: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (op & SLJIT_32) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (sljit_s32)src1w; + if (src2 & SLJIT_IMM) + src2w = (sljit_s32)src2w; + } +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + compiler->status_flags_state = 0; + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (src2 & SLJIT_IMM) + src2w &= 0x1f; +#else + if (src2 & SLJIT_IMM) { + if (op & SLJIT_32) + src2w &= 0x1f; + else + src2w &= 0x3f; + } +#endif + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (op) { + case SLJIT_FAST_RETURN: + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDI | RD(RETURN_ADDR_REG) | RS1(src) | IMM_I(0))); + else + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + + return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0)); + case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: + return SLJIT_SUCCESS; + case SLJIT_PREFETCH_L1: + case SLJIT_PREFETCH_L2: + case SLJIT_PREFETCH_L3: + case SLJIT_PREFETCH_ONCE: + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return freg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_u32 size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7)) +#define FMT(op) ((sljit_ins)((op & SLJIT_32) ^ SLJIT_32) << 17) + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +# define flags (sljit_u32)0 +#else + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21; +#endif + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, FCVT_W_S | FMT(op) | flags | RD(dst_r) | FRS1(src))); + + /* Store the integer value from a VFP register. */ + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + return emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); +#else + return emit_op_mem2(compiler, flags ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0); +#endif + } + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +# undef flags +#endif +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inst; +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)) << 21; +#endif + + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); +#else + FAIL_IF(emit_op_mem2(compiler, (flags ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); +#endif + src = TMP_REG1; + } else if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) + srcw = (sljit_s32)srcw; +#endif + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3)); + src = TMP_REG1; + } + + inst = FCVT_S_W | FMT(op) | FRD(dst_r) | RS1(src); + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + if (op & SLJIT_32) + inst |= F3(0x7); +#else + inst |= flags; + + if (op != SLJIT_CONV_F64_FROM_S32) + inst |= F3(0x7); +#endif + + FAIL_IF(push_inst(compiler, inst)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_ins inst; + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + switch (GET_FLAG_TYPE(op)) { + case SLJIT_F_EQUAL: + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_F_LESS: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_ORDERED_GREATER: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1); + break; + case SLJIT_F_GREATER: + case SLJIT_F_LESS_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2); + break; + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_ORDERED_GREATER_EQUAL: + inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1); + break; + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2))); + FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src1))); + inst = OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1); + break; + default: /* SLJIT_UNORDERED, SLJIT_ORDERED */ + FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(TMP_FREG1) | FRS1(src1) | FRS2(src2))); + inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(TMP_FREG1) | FRS2(TMP_FREG1); + break; + } + + return push_inst(compiler, inst); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_32; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_MOV_F64: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + else + dst_r = src; + } + break; + case SLJIT_NEG_F64: + FAIL_IF(push_inst(compiler, FSGNJN_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + break; + case SLJIT_ABS_F64: + FAIL_IF(push_inst(compiler, FSGNJX_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src))); + break; + case SLJIT_CONV_F64_FROM_F32: + /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */ + FAIL_IF(push_inst(compiler, FCVT_S_D | ((op & SLJIT_32) ? (1 << 25) : ((1 << 20) | F3(7))) | FRD(dst_r) | FRS1(src))); + op ^= SLJIT_32; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + sljit_s32 dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD_F64: + FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_SUB_F64: + FAIL_IF(push_inst(compiler, FSUB_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_MUL_F64: + FAIL_IF(push_inst(compiler, FMUL_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + + case SLJIT_DIV_F64: + FAIL_IF(push_inst(compiler, FDIV_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2))); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +#undef FLOAT_DATA +#undef FMT + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDI | RD(dst) | RS1(RETURN_ADDR_REG) | IMM_I(0)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) +#define BRANCH_LENGTH ((sljit_ins)(3 * sizeof(sljit_ins)) << 7) +#else +#define BRANCH_LENGTH ((sljit_ins)(7 * sizeof(sljit_ins)) << 7) +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) +{ + struct sljit_jump *jump; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + inst = BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; + break; + case SLJIT_NOT_EQUAL: + inst = BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; + break; + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_CARRY: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */ + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED: + inst = BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_NOT_CARRY: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED: + inst = BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO) | BRANCH_LENGTH; + break; + default: + /* Not conditional branch. */ + inst = 0; + break; + } + + if (inst != 0) { + PTR_FAIL_IF(push_inst(compiler, inst)); + jump->flags |= IS_COND; + } + + jump->addr = compiler->size; + inst = JALR | RS1(TMP_REG1) | IMM_I(0); + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_CALL; + inst |= RD(RETURN_ADDR_REG); + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); + + if (type & SLJIT_CALL_RETURN) { + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_s32 flags; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + flags = WORD_DATA | LOAD_DATA; +#else /* !SLJIT_CONFIG_RISCV_32 */ + flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA; +#endif /* SLJIT_CONFIG_RISCV_32 */ + + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0)); + src2 = TMP_REG2; + } + + if (src1 & SLJIT_IMM) { + if (src1w != 0) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3)); + src1 = TMP_REG1; + } + else + src1 = TMP_ZERO; + } + + if (src2 & SLJIT_IMM) { + if (src2w != 0) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w, TMP_REG3)); + src2 = TMP_REG2; + } + else + src2 = TMP_ZERO; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND)); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + inst = BNE | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_NOT_EQUAL: + inst = BEQ | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_LESS: + inst = BGEU | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_GREATER_EQUAL: + inst = BLTU | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_GREATER: + inst = BGEU | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_LESS_EQUAL: + inst = BLTU | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_SIG_LESS: + inst = BGE | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLT | RS1(src1) | RS2(src2) | BRANCH_LENGTH; + break; + case SLJIT_SIG_GREATER: + inst = BGE | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLT | RS1(src2) | RS2(src1) | BRANCH_LENGTH; + break; + } + + PTR_FAIL_IF(push_inst(compiler, inst)); + + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + return jump; +} + +#undef BRANCH_LENGTH + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (!(src & SLJIT_IMM)) { + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + return push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(src) | IMM_I(0)); + } + + /* These jumps are converted to jump/call instructions when possible. */ + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0)); + jump->u.target = (sljit_uw)srcw; + + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0))); + + /* Maximum number of instructions required for generating a constant. */ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 arg_types, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(arg_types); + CHECK_ERROR(); + CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + + if (type & SLJIT_CALL_RETURN) { + if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0))); + src = TMP_REG1; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + type = SLJIT_JUMP; + } + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 type) +{ + sljit_s32 src_r, dst_r, invert; + sljit_s32 saved_op = op; +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + sljit_s32 mem_type = WORD_DATA; +#else + sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + op = GET_OPCODE(op); + dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); + + if (type < SLJIT_F_EQUAL) { + src_r = OTHER_FLAG; + invert = type & 0x1; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(EQUAL_FLAG) | IMM_I(1))); + src_r = dst_r; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) { + src_r = OTHER_FLAG; + break; + } + FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(OTHER_FLAG) | IMM_I(1))); + src_r = dst_r; + invert ^= 0x1; + break; + } + } else { + invert = 0; + src_r = OTHER_FLAG; + + switch (type) { + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */ + case SLJIT_F_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: + case SLJIT_F_GREATER: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED: + invert = 1; + break; + } + } + + if (invert) { + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(src_r) | IMM_I(1))); + src_r = dst_r; + } + + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_r, dst, dstw); + + if (src_r != dst_r) + return push_inst(compiler, ADDI | RD(dst_r) | RS1(src_r) | IMM_I(0)); + return SLJIT_SUCCESS; + } + + mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE; + + if (dst & SLJIT_MEM) + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, ADDI | RD(dst_r))); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +{ + struct sljit_put_label *put_label; + sljit_s32 dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); + PTR_FAIL_IF(!put_label); + set_put_label(put_label, compiler, 0); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r)); +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + compiler->size += 1; +#else + compiler->size += 5; +#endif + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + + return put_label; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) +{ + sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); +} diff --git a/src/sljit/sljitNativeS390X.c b/src/sljit/sljitNativeS390X.c index 8eef910..be1ef43 100644 --- a/src/sljit/sljitNativeS390X.c +++ b/src/sljit/sljitNativeS390X.c @@ -220,7 +220,8 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t } /* fallthrough */ - case SLJIT_EQUAL_F64: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: return cc0; case SLJIT_NOT_EQUAL: @@ -234,13 +235,14 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t } /* fallthrough */ - case SLJIT_NOT_EQUAL_F64: + case SLJIT_UNORDERED_OR_NOT_EQUAL: return (cc1 | cc2 | cc3); case SLJIT_LESS: return cc1; case SLJIT_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: return (cc0 | cc2 | cc3); case SLJIT_GREATER: @@ -254,7 +256,8 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc0 | cc1 | cc2); case SLJIT_SIG_LESS: - case SLJIT_LESS_F64: + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: return cc1; case SLJIT_NOT_CARRY: @@ -263,7 +266,8 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t /* fallthrough */ case SLJIT_SIG_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: return (cc0 | cc1); case SLJIT_CARRY: @@ -272,6 +276,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t /* fallthrough */ case SLJIT_SIG_GREATER: + case SLJIT_UNORDERED_OR_GREATER: /* Overflow is considered greater, see SLJIT_SUB. */ return cc2 | cc3; @@ -283,7 +288,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc2 | cc3); /* fallthrough */ - case SLJIT_UNORDERED_F64: + case SLJIT_UNORDERED: return cc3; case SLJIT_NOT_OVERFLOW: @@ -291,14 +296,29 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t return (cc0 | cc1); /* fallthrough */ - case SLJIT_ORDERED_F64: + case SLJIT_ORDERED: return (cc0 | cc1 | cc2); - case SLJIT_GREATER_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return (cc1 | cc2); + + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: return cc2; - case SLJIT_GREATER_EQUAL_F64: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: return (cc0 | cc2); + + case SLJIT_UNORDERED_OR_LESS_EQUAL: + return (cc0 | cc1 | cc3); + + case SLJIT_UNORDERED_OR_EQUAL: + return (cc0 | cc3); + + case SLJIT_UNORDERED_OR_LESS: + return (cc1 | cc3); } SLJIT_UNREACHABLE(); @@ -1628,6 +1648,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) return 0; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL); +} + /* --------------------------------------------------------------------- */ /* Entry, exit */ /* --------------------------------------------------------------------- */ @@ -1636,7 +1661,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_s32 offset, i, tmp; CHECK_ERROR(); @@ -1648,8 +1673,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi offset = 2 * SSIZE_OF(sw); if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { - FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */ - offset += 9 * SSIZE_OF(sw); + if (saved_arg_count == 0) { + FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); + offset += 9 * SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15))); + offset += (8 - saved_arg_count) * SSIZE_OF(sw); + } } else { if (scratches == SLJIT_FIRST_SAVED_REG) { FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15))); @@ -1659,15 +1689,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); } - if (saveds == 0) { - FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); - offset += SSIZE_OF(sw); - } else { - FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); - offset += (saveds + 1) * SSIZE_OF(sw); + if (saved_arg_count == 0) { + if (saveds == 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } else if (saveds > saved_arg_count) { + if (saveds == saved_arg_count + 1) { + FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15))); + offset += (saveds - saved_arg_count) * SSIZE_OF(sw); + } } } + if (saved_arg_count > 0) { + FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } + tmp = SLJIT_FS0 - fsaveds; for (i = SLJIT_FS0; i > tmp; i--) { FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset)); @@ -1684,15 +1729,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size))); + if (options & SLJIT_ENTER_REG_ARG) + return SLJIT_SUCCESS; + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; tmp = 0; while (arg_types > 0) { if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count)))); - tmp++; + FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp)))); + saved_arg_count++; } - word_arg_count++; + tmp++; } arg_types >>= SLJIT_ARG_SHIFT; @@ -1719,6 +1768,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) sljit_s32 local_size = compiler->local_size; sljit_s32 saveds = compiler->saveds; sljit_s32 scratches = compiler->scratches; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); if (is_u12(local_size)) FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size)); @@ -1727,8 +1777,13 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) offset = 2 * SSIZE_OF(sw); if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) { - FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */ - offset += 9 * SSIZE_OF(sw); + if (kept_saveds_count == 0) { + FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); + offset += 9 * SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15))); + offset += (8 - kept_saveds_count) * SSIZE_OF(sw); + } } else { if (scratches == SLJIT_FIRST_SAVED_REG) { FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15))); @@ -1738,15 +1793,30 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw); } - if (saveds == 0) { - FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); - offset += SSIZE_OF(sw); - } else { - FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); - offset += (saveds + 1) * SSIZE_OF(sw); + if (kept_saveds_count == 0) { + if (saveds == 0) { + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15))); + offset += (saveds + 1) * SSIZE_OF(sw); + } + } else if (saveds > kept_saveds_count) { + if (saveds == kept_saveds_count + 1) { + FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15))); + offset += SSIZE_OF(sw); + } else { + FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15))); + offset += (saveds - kept_saveds_count) * SSIZE_OF(sw); + } } } + if (kept_saveds_count > 0) { + FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15))); + offset += SSIZE_OF(sw); + } + tmp = SLJIT_FS0 - compiler->fsaveds; for (i = SLJIT_FS0; i > tmp; i--) { FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset)); @@ -2734,10 +2804,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w); } @@ -3117,6 +3184,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { + SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); @@ -3125,11 +3193,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, type); } @@ -3181,11 +3245,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi type = SLJIT_JUMP; } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_ijump(compiler, type, src, srcw); } @@ -3193,7 +3253,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co sljit_s32 dst, sljit_sw dstw, sljit_s32 type) { - sljit_u8 mask = get_cc(compiler, type & 0xff); + sljit_u8 mask = get_cc(compiler, type); CHECK_ERROR(); CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); @@ -3263,7 +3323,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil sljit_s32 dst_reg, sljit_s32 src, sljit_sw srcw) { - sljit_u8 mask = get_cc(compiler, type & 0xff); + sljit_u8 mask = get_cc(compiler, type); sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32); sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0; diff --git a/src/sljit/sljitNativeSPARC_32.c b/src/sljit/sljitNativeSPARC_32.c deleted file mode 100644 index 218992b..0000000 --- a/src/sljit/sljitNativeSPARC_32.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw imm) -{ - if (imm <= SIMM_MAX && imm >= SIMM_MIN) - return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst)); - - FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst))); - return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS; -} - -#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) - -static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags, - sljit_s32 dst, sljit_s32 src1, sljit_sw src2) -{ - SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); - - switch (op) { - case SLJIT_MOV: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (dst != src2) - return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); - return SLJIT_SUCCESS; - - case SLJIT_MOV_U8: - case SLJIT_MOV_S8: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - if (op == SLJIT_MOV_U8) - return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst)); - FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); - return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); - } - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_MOV_U16: - case SLJIT_MOV_S16: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { - FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); - return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); - } - SLJIT_ASSERT(dst == src2); - return SLJIT_SUCCESS; - - case SLJIT_NOT: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DRF(dst, flags)); - - case SLJIT_CLZ: - SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS)); - FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst))); - - /* Loop. */ - FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); - FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, BICC | DA(0xe) | ((sljit_ins)-2 & DISP_MASK), UNMOVABLE_INS)); - return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); - - case SLJIT_ADD: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; - return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); - - case SLJIT_ADDC: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; - return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); - - case SLJIT_SUB: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; - return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); - - case SLJIT_SUBC: - compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; - return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); - - case SLJIT_MUL: - compiler->status_flags_state = 0; - FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); - if (!(flags & SET_FLAGS)) - return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK))); - return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); - - case SLJIT_AND: - return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); - - case SLJIT_OR: - return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); - - case SLJIT_XOR: - return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags)); - - case SLJIT_SHL: - FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); - return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); - - case SLJIT_LSHR: - FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); - return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); - - case SLJIT_ASHR: - FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); - return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); - } - - SLJIT_UNREACHABLE(); - return SLJIT_SUCCESS; -} - -static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src) -{ - sljit_s32 reg_index = 8; - sljit_s32 word_reg_index = 8; - sljit_s32 float_arg_index = 1; - sljit_s32 double_arg_count = 0; - sljit_u32 float_offset = (16 + 6) * sizeof(sljit_sw); - sljit_s32 types = 0; - sljit_s32 reg = 0; - sljit_s32 move_to_tmp2 = 0; - - if (src) - reg = reg_map[*src & REG_MASK]; - - arg_types >>= SLJIT_ARG_SHIFT; - - while (arg_types) { - types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK); - - switch (arg_types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - float_arg_index++; - double_arg_count++; - if (reg_index == reg || reg_index + 1 == reg) - move_to_tmp2 = 1; - reg_index += 2; - break; - case SLJIT_ARG_TYPE_F32: - float_arg_index++; - if (reg_index == reg) - move_to_tmp2 = 1; - reg_index++; - break; - default: - if (reg_index != word_reg_index && reg_index == reg) - move_to_tmp2 = 1; - reg_index++; - word_reg_index++; - break; - } - - arg_types >>= SLJIT_ARG_SHIFT; - } - - if (move_to_tmp2) { - if (reg < 14) - FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1))); - *src = TMP_REG1; - } - - arg_types = types; - - while (arg_types) { - switch (arg_types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - float_arg_index--; - if (float_arg_index == 4 && double_arg_count == 4) { - /* The address is not doubleword aligned, so two instructions are required to store the double. */ - FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS)); - } - else - FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - float_offset -= sizeof(sljit_f64); - break; - case SLJIT_ARG_TYPE_F32: - float_arg_index--; - FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - float_offset -= sizeof(sljit_f64); - break; - default: - break; - } - - arg_types >>= SLJIT_ARG_SHIFT; - } - - float_offset = (16 + 6) * sizeof(sljit_sw); - - while (types) { - switch (types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - reg_index -= 2; - if (reg_index < 14) { - if ((reg_index & 0x1) != 0) { - FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); - if (reg_index < 8 + 6 - 1) - FAIL_IF(push_inst(compiler, LDUW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), reg_index + 1)); - } - else - FAIL_IF(push_inst(compiler, LDD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); - } - float_offset -= sizeof(sljit_f64); - break; - case SLJIT_ARG_TYPE_F32: - reg_index--; - if (reg_index < 8 + 6) - FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index)); - float_offset -= sizeof(sljit_f64); - break; - default: - reg_index--; - word_reg_index--; - - if (reg_index != word_reg_index) { - if (reg_index < 14) - FAIL_IF(push_inst(compiler, OR | DA(reg_index) | S1(0) | S2A(word_reg_index), reg_index)); - else - FAIL_IF(push_inst(compiler, STW | DA(word_reg_index) | S1(SLJIT_SP) | IMM(92), word_reg_index)); - } - break; - } - - types >>= SLJIT_ARG_SHIFT; - } - - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value) -{ - FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst))); - return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst)); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) -{ - sljit_ins *inst = (sljit_ins *)addr; - SLJIT_UNUSED_ARG(executable_offset); - - SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0); - SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000)); - inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff); - inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff); - SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1); - inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); - SLJIT_CACHE_FLUSH(inst, inst + 2); -} - -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) -{ - sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset); -} diff --git a/src/sljit/sljitNativeSPARC_common.c b/src/sljit/sljitNativeSPARC_common.c deleted file mode 100644 index c8d19e1..0000000 --- a/src/sljit/sljitNativeSPARC_common.c +++ /dev/null @@ -1,1673 +0,0 @@ -/* - * Stack-less Just-In-Time compiler - * - * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT - * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN - * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) -{ - return "SPARC" SLJIT_CPUINFO; -} - -/* Length of an instruction word - Both for sparc-32 and sparc-64 */ -typedef sljit_u32 sljit_ins; - -#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) - -static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) -{ -#if defined(__SUNPRO_C) && __SUNPRO_C < 0x590 - __asm ( - /* if (from == to) return */ - "cmp %i0, %i1\n" - "be .leave\n" - "nop\n" - - /* loop until from >= to */ - ".mainloop:\n" - "flush %i0\n" - "add %i0, 8, %i0\n" - "cmp %i0, %i1\n" - "bcs .mainloop\n" - "nop\n" - - /* The comparison was done above. */ - "bne .leave\n" - /* nop is not necessary here, since the - sub operation has no side effect. */ - "sub %i0, 4, %i0\n" - "flush %i0\n" - ".leave:" - ); -#else - if (SLJIT_UNLIKELY(from == to)) - return; - - do { - __asm__ volatile ( - "flush %0\n" - : : "r"(from) - ); - /* Operates at least on doubleword. */ - from += 2; - } while (from < to); - - if (from == to) { - /* Flush the last word. */ - from --; - __asm__ volatile ( - "flush %0\n" - : : "r"(from) - ); - } -#endif -} - -#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */ - -/* TMP_REG2 is not used by getput_arg */ -#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) -#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) -#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) -/* This register is modified by calls, which affects the instruction - in the delay slot if it is used as a source register. */ -#define TMP_LINK (SLJIT_NUMBER_OF_REGISTERS + 5) - -#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) -#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) - -static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { - 0, 8, 9, 10, 11, 23, 22, 21, 20, 19, 18, 17, 16, 29, 28, 27, 26, 25, 24, 14, 1, 12, 13, 15 -}; - -static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { - 0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 -}; - -/* --------------------------------------------------------------------- */ -/* Instrucion forms */ -/* --------------------------------------------------------------------- */ - -#define D(d) ((sljit_ins)reg_map[d] << 25) -#define FD(d) ((sljit_ins)freg_map[d] << 25) -#define FDN(d) (((sljit_ins)freg_map[d] | 0x1) << 25) -#define DA(d) ((sljit_ins)(d) << 25) -#define S1(s1) ((sljit_ins)reg_map[s1] << 14) -#define FS1(s1) ((sljit_ins)freg_map[s1] << 14) -#define S1A(s1) ((sljit_ins)(s1) << 14) -#define S2(s2) ((sljit_ins)reg_map[s2]) -#define FS2(s2) ((sljit_ins)freg_map[s2]) -#define FS2N(s2) ((sljit_ins)freg_map[s2] | 0x1) -#define S2A(s2) ((sljit_ins)(s2)) -#define IMM_ARG 0x2000 -#define DOP(op) ((sljit_ins)(op) << 5) -#define IMM(imm) (((sljit_ins)(imm) & 0x1fff) | IMM_ARG) - -#define DR(dr) (reg_map[dr]) -#define DRF(dr, flags) ((sljit_s32)(reg_map[dr] | ((flags) & SET_FLAGS))) -#define OPC1(opcode) ((sljit_ins)(opcode) << 30) -#define OPC2(opcode) ((sljit_ins)(opcode) << 22) -#define OPC3(opcode) ((sljit_ins)(opcode) << 19) -#define SET_FLAGS OPC3(0x10) - -#define ADD (OPC1(0x2) | OPC3(0x00)) -#define ADDC (OPC1(0x2) | OPC3(0x08)) -#define AND (OPC1(0x2) | OPC3(0x01)) -#define ANDN (OPC1(0x2) | OPC3(0x05)) -#define CALL (OPC1(0x1)) -#define FABSS (OPC1(0x2) | OPC3(0x34) | DOP(0x09)) -#define FADDD (OPC1(0x2) | OPC3(0x34) | DOP(0x42)) -#define FADDS (OPC1(0x2) | OPC3(0x34) | DOP(0x41)) -#define FCMPD (OPC1(0x2) | OPC3(0x35) | DOP(0x52)) -#define FCMPS (OPC1(0x2) | OPC3(0x35) | DOP(0x51)) -#define FDIVD (OPC1(0x2) | OPC3(0x34) | DOP(0x4e)) -#define FDIVS (OPC1(0x2) | OPC3(0x34) | DOP(0x4d)) -#define FDTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd2)) -#define FDTOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc6)) -#define FITOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc8)) -#define FITOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc4)) -#define FMOVS (OPC1(0x2) | OPC3(0x34) | DOP(0x01)) -#define FMULD (OPC1(0x2) | OPC3(0x34) | DOP(0x4a)) -#define FMULS (OPC1(0x2) | OPC3(0x34) | DOP(0x49)) -#define FNEGS (OPC1(0x2) | OPC3(0x34) | DOP(0x05)) -#define FSTOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc9)) -#define FSTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd1)) -#define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46)) -#define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) -#define JMPL (OPC1(0x2) | OPC3(0x38)) -#define LDD (OPC1(0x3) | OPC3(0x03)) -#define LDDF (OPC1(0x3) | OPC3(0x23)) -#define LDF (OPC1(0x3) | OPC3(0x20)) -#define LDUW (OPC1(0x3) | OPC3(0x00)) -#define NOP (OPC1(0x0) | OPC2(0x04)) -#define OR (OPC1(0x2) | OPC3(0x02)) -#define ORN (OPC1(0x2) | OPC3(0x06)) -#define RDY (OPC1(0x2) | OPC3(0x28) | S1A(0)) -#define RESTORE (OPC1(0x2) | OPC3(0x3d)) -#define SAVE (OPC1(0x2) | OPC3(0x3c)) -#define SETHI (OPC1(0x0) | OPC2(0x04)) -#define SLL (OPC1(0x2) | OPC3(0x25)) -#define SLLX (OPC1(0x2) | OPC3(0x25) | (1 << 12)) -#define SRA (OPC1(0x2) | OPC3(0x27)) -#define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) -#define SRL (OPC1(0x2) | OPC3(0x26)) -#define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) -#define STD (OPC1(0x3) | OPC3(0x07)) -#define STDF (OPC1(0x3) | OPC3(0x27)) -#define STF (OPC1(0x3) | OPC3(0x24)) -#define STW (OPC1(0x3) | OPC3(0x04)) -#define SUB (OPC1(0x2) | OPC3(0x04)) -#define SUBC (OPC1(0x2) | OPC3(0x0c)) -#define TA (OPC1(0x2) | OPC3(0x3a) | (8 << 25)) -#define WRY (OPC1(0x2) | OPC3(0x30) | DA(0)) -#define XOR (OPC1(0x2) | OPC3(0x03)) -#define XNOR (OPC1(0x2) | OPC3(0x07)) - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define MAX_DISP (0x1fffff) -#define MIN_DISP (-0x200000) -#define DISP_MASK ((sljit_ins)0x3fffff) - -#define BICC (OPC1(0x0) | OPC2(0x2)) -#define FBFCC (OPC1(0x0) | OPC2(0x6)) -#define SLL_W SLL -#define SDIV (OPC1(0x2) | OPC3(0x0f)) -#define SMUL (OPC1(0x2) | OPC3(0x0b)) -#define UDIV (OPC1(0x2) | OPC3(0x0e)) -#define UMUL (OPC1(0x2) | OPC3(0x0a)) -#else -#define SLL_W SLLX -#endif - -#define SIMM_MAX (0x0fff) -#define SIMM_MIN (-0x1000) - -/* dest_reg is the absolute name of the register - Useful for reordering instructions in the delay slot. */ -static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 delay_slot) -{ - sljit_ins *ptr; - SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS - || (delay_slot & DST_INS_MASK) == MOVABLE_INS - || (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f)); - ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); - FAIL_IF(!ptr); - *ptr = ins; - compiler->size++; - compiler->delay_slot = delay_slot; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) -{ - sljit_sw diff; - sljit_uw target_addr; - sljit_ins *inst; - sljit_ins saved_inst; - - if (jump->flags & SLJIT_REWRITABLE_JUMP) - return code_ptr; - - if (jump->flags & JUMP_ADDR) - target_addr = jump->u.target; - else { - SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; - } - inst = (sljit_ins*)jump->addr; - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if (jump->flags & IS_CALL) { - /* Call is always patchable on sparc 32. */ - jump->flags |= PATCH_CALL; - if (jump->flags & IS_MOVABLE) { - inst[0] = inst[-1]; - inst[-1] = CALL; - jump->addr -= sizeof(sljit_ins); - return inst; - } - inst[0] = CALL; - inst[1] = NOP; - return inst + 1; - } -#else - /* Both calls and BPr instructions shall not pass this point. */ -#error "Implementation required" -#endif - - if (jump->flags & IS_COND) - inst--; - - diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1) - executable_offset) >> 2; - - if (jump->flags & IS_MOVABLE) { - if (diff <= MAX_DISP && diff >= MIN_DISP) { - jump->flags |= PATCH_B; - inst--; - if (jump->flags & IS_COND) { - saved_inst = inst[0]; - inst[0] = inst[1] ^ (1 << 28); - inst[1] = saved_inst; - } else { - inst[1] = inst[0]; - inst[0] = BICC | DA(0x8); - } - jump->addr = (sljit_uw)inst; - return inst + 1; - } - } - - diff += SSIZE_OF(ins); - - if (diff <= MAX_DISP && diff >= MIN_DISP) { - jump->flags |= PATCH_B; - if (jump->flags & IS_COND) - inst[0] ^= (1 << 28); - else - inst[0] = BICC | DA(0x8); - inst[1] = NOP; - jump->addr = (sljit_uw)inst; - return inst + 1; - } - - return code_ptr; -} - -SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) -{ - struct sljit_memory_fragment *buf; - sljit_ins *code; - sljit_ins *code_ptr; - sljit_ins *buf_ptr; - sljit_ins *buf_end; - sljit_uw word_count; - sljit_uw next_addr; - sljit_sw executable_offset; - sljit_sw addr; - - struct sljit_label *label; - struct sljit_jump *jump; - struct sljit_const *const_; - struct sljit_put_label *put_label; - - CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_generate_code(compiler)); - reverse_buf(compiler); - - code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data); - PTR_FAIL_WITH_EXEC_IF(code); - buf = compiler->buf; - - code_ptr = code; - word_count = 0; - next_addr = 0; - executable_offset = SLJIT_EXEC_OFFSET(code); - - label = compiler->labels; - jump = compiler->jumps; - const_ = compiler->consts; - put_label = compiler->put_labels; - - do { - buf_ptr = (sljit_ins*)buf->memory; - buf_end = buf_ptr + (buf->used_size >> 2); - do { - *code_ptr = *buf_ptr++; - if (next_addr == word_count) { - SLJIT_ASSERT(!label || label->size >= word_count); - SLJIT_ASSERT(!jump || jump->addr >= word_count); - SLJIT_ASSERT(!const_ || const_->addr >= word_count); - SLJIT_ASSERT(!put_label || put_label->addr >= word_count); - - /* These structures are ordered by their address. */ - if (label && label->size == word_count) { - /* Just recording the address. */ - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = (sljit_uw)(code_ptr - code); - label = label->next; - } - if (jump && jump->addr == word_count) { -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - jump->addr = (sljit_uw)(code_ptr - 3); -#else - jump->addr = (sljit_uw)(code_ptr - 6); -#endif - code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); - jump = jump->next; - } - if (const_ && const_->addr == word_count) { - /* Just recording the address. */ - const_->addr = (sljit_uw)code_ptr; - const_ = const_->next; - } - if (put_label && put_label->addr == word_count) { - SLJIT_ASSERT(put_label->label); - put_label->addr = (sljit_uw)code_ptr; - put_label = put_label->next; - } - next_addr = compute_next_addr(label, jump, const_, put_label); - } - code_ptr ++; - word_count ++; - } while (buf_ptr < buf_end); - - buf = buf->next; - } while (buf); - - if (label && label->size == word_count) { - label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - label->size = (sljit_uw)(code_ptr - code); - label = label->next; - } - - SLJIT_ASSERT(!label); - SLJIT_ASSERT(!jump); - SLJIT_ASSERT(!const_); - SLJIT_ASSERT(!put_label); - SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size); - - jump = compiler->jumps; - while (jump) { - do { - addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); - buf_ptr = (sljit_ins *)jump->addr; - - if (jump->flags & PATCH_CALL) { - addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT(addr <= 0x1fffffff && addr >= -0x20000000); - buf_ptr[0] = CALL | ((sljit_ins)addr & 0x3fffffff); - break; - } - if (jump->flags & PATCH_B) { - addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; - SLJIT_ASSERT(addr <= MAX_DISP && addr >= MIN_DISP); - buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | ((sljit_ins)addr & DISP_MASK); - break; - } - - /* Set the fields of immediate loads. */ -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000)); - buf_ptr[0] |= (sljit_ins)(addr >> 10) & 0x3fffff; - buf_ptr[1] |= (sljit_ins)addr & 0x3ff; -#else -#error "Implementation required" -#endif - } while (0); - jump = jump->next; - } - - put_label = compiler->put_labels; - while (put_label) { - addr = (sljit_sw)put_label->label->addr; - buf_ptr = (sljit_ins *)put_label->addr; - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000)); - buf_ptr[0] |= (addr >> 10) & 0x3fffff; - buf_ptr[1] |= addr & 0x3ff; -#else -#error "Implementation required" -#endif - put_label = put_label->next; - } - - compiler->error = SLJIT_ERR_COMPILED; - compiler->executable_offset = executable_offset; - compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins); - - code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); - code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - - SLJIT_CACHE_FLUSH(code, code_ptr); - SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1); - return code; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) -{ - switch (feature_type) { - case SLJIT_HAS_FPU: -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#else - /* Available by default. */ - return 1; -#endif - - case SLJIT_HAS_ZERO_REGISTER: - return 1; - -#if (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64) - case SLJIT_HAS_CMOV: - return 1; -#endif - - default: - return 0; - } -} - -/* --------------------------------------------------------------------- */ -/* Entry, exit */ -/* --------------------------------------------------------------------- */ - -/* Creates an index in data_transfer_insts array. */ -#define LOAD_DATA 0x01 -#define WORD_DATA 0x00 -#define BYTE_DATA 0x02 -#define HALF_DATA 0x04 -#define INT_DATA 0x06 -#define SIGNED_DATA 0x08 -/* Separates integer and floating point registers */ -#define GPR_REG 0x0f -#define DOUBLE_DATA 0x10 -#define SINGLE_DATA 0x12 - -#define MEM_MASK 0x1f - -#define ARG_TEST 0x00020 -#define ALT_KEEP_CACHE 0x00040 -#define CUMULATIVE_OP 0x00080 -#define IMM_OP 0x00100 -#define MOVE_OP 0x00200 -#define SRC2_IMM 0x00400 - -#define REG_DEST 0x00800 -#define REG2_SOURCE 0x01000 -#define SLOW_SRC1 0x02000 -#define SLOW_SRC2 0x04000 -#define SLOW_DEST 0x08000 - -/* SET_FLAGS (0x10 << 19) also belong here! */ - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#include "sljitNativeSPARC_32.c" -#else -#include "sljitNativeSPARC_64.c" -#endif - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - sljit_s32 reg_index, types, tmp; - sljit_u32 float_offset, args_offset; - sljit_s32 saved_arg_index, scratch_arg_index, float_arg_index; - - CHECK_ERROR(); - CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; - compiler->local_size = local_size; - - if (local_size <= -SIMM_MIN) { - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS)); - } - else { - FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size)); - FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS)); - } - - arg_types >>= SLJIT_ARG_SHIFT; - - types = arg_types; - float_offset = 16 * sizeof(sljit_sw); - reg_index = 24; - - while (types && reg_index < 24 + 6) { - switch (types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - if (reg_index & 0x1) { - FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - if (reg_index >= 24 + 6 - 1) - break; - FAIL_IF(push_inst(compiler, STW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), MOVABLE_INS)); - } else - FAIL_IF(push_inst(compiler, STD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - - float_offset += sizeof(sljit_f64); - reg_index++; - break; - case SLJIT_ARG_TYPE_F32: - FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - float_offset += sizeof(sljit_f64); - break; - } - - reg_index++; - types >>= SLJIT_ARG_SHIFT; - } - - args_offset = (16 + 1 + 6) * sizeof(sljit_sw); - float_offset = 16 * sizeof(sljit_sw); - reg_index = 24; - saved_arg_index = 24; - scratch_arg_index = 8 - 1; - float_arg_index = 1; - - while (arg_types) { - switch (arg_types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - if (reg_index < 24 + 6 - 1) { - FAIL_IF(push_inst(compiler, LDDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - } else if (reg_index < 24 + 6) { - FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset), MOVABLE_INS)); - } else { - FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset + sizeof(sljit_sw)), MOVABLE_INS)); - } - - float_arg_index++; - float_offset += sizeof(sljit_f64); - reg_index++; - break; - case SLJIT_ARG_TYPE_F32: - if (reg_index < 24 + 6) - FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS)); - else - FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS)); - float_arg_index++; - float_offset += sizeof(sljit_f64); - break; - default: - scratch_arg_index++; - - if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - tmp = saved_arg_index++; - if (tmp == reg_index) - break; - } else - tmp = scratch_arg_index; - - if (reg_index < 24 + 6) - FAIL_IF(push_inst(compiler, OR | DA(tmp) | S1(0) | S2A(reg_index), tmp)); - else - FAIL_IF(push_inst(compiler, LDUW | DA(tmp) | S1A(30) | IMM(args_offset), tmp)); - break; - } - - reg_index++; - arg_types >>= SLJIT_ARG_SHIFT; - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler, - sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, - sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) -{ - CHECK_ERROR(); - CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); - set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - - compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_return_void(compiler)); - - FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); - return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(SLJIT_R0) | S2(0), UNMOVABLE_INS); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - - if (TYPE_CAST_NEEDED(op) || !FAST_IS_REG(src)) { - FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - src = SLJIT_R0; - } - - FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); - return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS); -} - -/* --------------------------------------------------------------------- */ -/* Operators */ -/* --------------------------------------------------------------------- */ - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) -#define ARCH_32_64(a, b) a -#else -#define ARCH_32_64(a, b) b -#endif - -static const sljit_ins data_transfer_insts[16 + 4] = { -/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), -/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), -/* u b s */ OPC1(3) | OPC3(0x05) /* stb */, -/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */, -/* u h s */ OPC1(3) | OPC3(0x06) /* sth */, -/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */, -/* u i s */ OPC1(3) | OPC3(0x04) /* stw */, -/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */, - -/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), -/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), -/* s b s */ OPC1(3) | OPC3(0x05) /* stb */, -/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */, -/* s h s */ OPC1(3) | OPC3(0x06) /* sth */, -/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */, -/* s i s */ OPC1(3) | OPC3(0x04) /* stw */, -/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */), - -/* d s */ OPC1(3) | OPC3(0x27), -/* d l */ OPC1(3) | OPC3(0x23), -/* s s */ OPC1(3) | OPC3(0x24), -/* s l */ OPC1(3) | OPC3(0x20), -}; - -#undef ARCH_32_64 - -/* Can perform an operation using at most 1 instruction. */ -static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) -{ - SLJIT_ASSERT(arg & SLJIT_MEM); - - if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) - || ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) { - /* Works for both absoulte and relative addresses (immediate case). */ - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; - FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] - | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)) - | S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)), - ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS)); - return -1; - } - return 0; -} - -/* See getput_arg below. - Note: can_cache is called only for binary operators. Those - operators always uses word arguments without write back. */ -static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) -{ - SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); - - /* Simple operation except for updates. */ - if (arg & OFFS_REG_MASK) { - argw &= 0x3; - SLJIT_ASSERT(argw); - next_argw &= 0x3; - if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == next_argw) - return 1; - return 0; - } - - if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) - return 1; - return 0; -} - -/* Emit the necessary instructions. See can_cache above. */ -static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) -{ - sljit_s32 base, arg2, delay_slot; - sljit_ins dest; - - SLJIT_ASSERT(arg & SLJIT_MEM); - if (!(next_arg & SLJIT_MEM)) { - next_arg = 0; - next_argw = 0; - } - - base = arg & REG_MASK; - if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - argw &= 0x3; - - /* Using the cache. */ - if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw)) - arg2 = TMP_REG3; - else { - if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) { - compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); - compiler->cache_argw = argw; - arg2 = TMP_REG3; - } - else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && reg != OFFS_REG(arg)) - arg2 = reg; - else /* It must be a mov operation, so tmp1 must be free to use. */ - arg2 = TMP_REG1; - FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | (sljit_ins)argw, DR(arg2))); - } - } - else { - /* Using the cache. */ - if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) { - if (argw != compiler->cache_argw) { - FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); - compiler->cache_argw = argw; - } - arg2 = TMP_REG3; - } else { - if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) { - compiler->cache_arg = SLJIT_MEM; - compiler->cache_argw = argw; - arg2 = TMP_REG3; - } - else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base) - arg2 = reg; - else /* It must be a mov operation, so tmp1 must be free to use. */ - arg2 = TMP_REG1; - FAIL_IF(load_immediate(compiler, arg2, argw)); - } - } - - dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)); - delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS; - if (!base) - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot); -} - -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) -{ - if (getput_arg_fast(compiler, flags, reg, arg, argw)) - return compiler->error; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, flags, reg, arg, argw, 0, 0); -} - -static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) -{ - if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) - return compiler->error; - return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); -} - -static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - /* arg1 goes to TMP_REG1 or src reg - arg2 goes to TMP_REG2, imm or src reg - TMP_REG3 can be used for caching - result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ - sljit_s32 dst_r = TMP_REG2; - sljit_s32 src1_r; - sljit_sw src2_r = 0; - sljit_s32 sugg_src2_r = TMP_REG2; - - if (!(flags & ALT_KEEP_CACHE)) { - compiler->cache_arg = 0; - compiler->cache_argw = 0; - } - - if (dst != TMP_REG2) { - if (FAST_IS_REG(dst)) { - dst_r = dst; - flags |= REG_DEST; - if (flags & MOVE_OP) - sugg_src2_r = dst_r; - } - else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) - flags |= SLOW_DEST; - } - - if (flags & IMM_OP) { - if ((src2 & SLJIT_IMM) && src2w) { - if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) { - flags |= SRC2_IMM; - src2_r = src2w; - } - } - if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { - if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) { - flags |= SRC2_IMM; - src2_r = src1w; - - /* And swap arguments. */ - src1 = src2; - src1w = src2w; - src2 = SLJIT_IMM; - /* src2w = src2_r unneeded. */ - } - } - } - - /* Source 1. */ - if (FAST_IS_REG(src1)) - src1_r = src1; - else if (src1 & SLJIT_IMM) { - if (src1w) { - FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); - src1_r = TMP_REG1; - } - else - src1_r = 0; - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC1; - src1_r = TMP_REG1; - } - - /* Source 2. */ - if (FAST_IS_REG(src2)) { - src2_r = src2; - flags |= REG2_SOURCE; - if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP) - dst_r = src2_r; - } - else if (src2 & SLJIT_IMM) { - if (!(flags & SRC2_IMM)) { - if (src2w) { - FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); - src2_r = sugg_src2_r; - } - else { - src2_r = 0; - if (flags & MOVE_OP) { - if (dst & SLJIT_MEM) - dst_r = 0; - else - op = SLJIT_MOV; - } - } - } - } - else { - if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC2; - src2_r = sugg_src2_r; - } - - if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - SLJIT_ASSERT(src2_r == TMP_REG2); - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } - } - else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); - else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); - - FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); - - if (dst & SLJIT_MEM) { - if (!(flags & SLOW_DEST)) { - getput_arg_fast(compiler, flags, dst_r, dst, dstw); - return compiler->error; - } - return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_op0(compiler, op)); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_BREAKPOINT: - return push_inst(compiler, TA, UNMOVABLE_INS); - case SLJIT_NOP: - return push_inst(compiler, NOP, UNMOVABLE_INS); - case SLJIT_LMUL_UW: - case SLJIT_LMUL_SW: -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); - return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1)); -#else -#error "Implementation required" -#endif - case SLJIT_DIVMOD_UW: - case SLJIT_DIVMOD_SW: - case SLJIT_DIV_UW: - case SLJIT_DIV_SW: - SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if ((op | 0x2) == SLJIT_DIV_UW) - FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); - else { - FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); - } - if (op <= SLJIT_DIVMOD_SW) - FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); - if (op >= SLJIT_DIV_UW) - return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1))); - return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1)); -#else -#error "Implementation required" -#endif - case SLJIT_ENDBR: - case SLJIT_SKIP_FRAMES_BEFORE_RETURN: - return SLJIT_SUCCESS; - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw) -{ - sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; - - CHECK_ERROR(); - CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_MOV: -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - case SLJIT_MOV_U32: - case SLJIT_MOV_S32: - case SLJIT_MOV32: -#endif - case SLJIT_MOV_P: - return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw); - - case SLJIT_MOV_U8: - return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw); - - case SLJIT_MOV_S8: - return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw); - - case SLJIT_MOV_U16: - return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw); - - case SLJIT_MOV_S16: - return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw); - - case SLJIT_NOT: - case SLJIT_CLZ: - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; - - CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w)); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_ADD: - case SLJIT_ADDC: - case SLJIT_MUL: - case SLJIT_AND: - case SLJIT_OR: - case SLJIT_XOR: - return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SUB: - case SLJIT_SUBC: - return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - - case SLJIT_SHL: - case SLJIT_LSHR: - case SLJIT_ASHR: -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if (src2 & SLJIT_IMM) - src2w &= 0x1f; -#else - SLJIT_UNREACHABLE(); -#endif - return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 src, sljit_sw srcw) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_op_src(compiler, op, src, srcw)); - ADJUST_LOCAL_OFFSET(src, srcw); - - switch (op) { - case SLJIT_FAST_RETURN: - if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK))); - else - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw)); - - FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS)); - return push_inst(compiler, NOP, UNMOVABLE_INS); - case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN: - case SLJIT_PREFETCH_L1: - case SLJIT_PREFETCH_L2: - case SLJIT_PREFETCH_L3: - case SLJIT_PREFETCH_ONCE: - return SLJIT_SUCCESS; - } - - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return reg_map[reg]; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) -{ - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); - return freg_map[reg]; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, - void *instruction, sljit_u32 size) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); - - return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); -} - -/* --------------------------------------------------------------------- */ -/* Floating point operators */ -/* --------------------------------------------------------------------- */ - -#define FLOAT_DATA(op) ((sljit_ins)DOUBLE_DATA | (((sljit_ins)(op) & SLJIT_32) >> 7)) -#define SELECT_FOP(op, single, double) ((op & SLJIT_32) ? single : double) -#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) - -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw) -{ - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); - src = TMP_FREG1; - } - - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | FD(TMP_FREG1) | FS2(src), MOVABLE_INS)); - - if (FAST_IS_REG(dst)) { - FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); - return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET); - } - - /* Store the integer value from a VFP register. */ - return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); -} - -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw) -{ - sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - - if (src & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) - srcw = (sljit_s32)srcw; -#endif - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; - srcw = 0; - } - - if (FAST_IS_REG(src)) { - FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); - src = SLJIT_MEM1(SLJIT_SP); - srcw = FLOAT_TMP_MEM_OFFSET; - } - - FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | FD(dst_r) | FS2(TMP_FREG1), MOVABLE_INS)); - - if (dst & SLJIT_MEM) - return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - if (src1 & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); - src1 = TMP_FREG1; - } - - if (src2 & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); - src2 = TMP_FREG2; - } - - return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | FS1(src1) | FS2(src2), FCC_IS_SET | MOVABLE_INS); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw) -{ - sljit_s32 dst_r; - - CHECK_ERROR(); - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); - SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); - - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_32; - - dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); - src = dst_r; - } - - switch (GET_OPCODE(op)) { - case SLJIT_MOV_F64: - if (src != dst_r) { - if (dst_r != TMP_FREG1) { - FAIL_IF(push_inst(compiler, FMOVS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (!(op & SLJIT_32)) - FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); - } - else - dst_r = src; - } - break; - case SLJIT_NEG_F64: - FAIL_IF(push_inst(compiler, FNEGS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (dst_r != src && !(op & SLJIT_32)) - FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); - break; - case SLJIT_ABS_F64: - FAIL_IF(push_inst(compiler, FABSS | FD(dst_r) | FS2(src), MOVABLE_INS)); - if (dst_r != src && !(op & SLJIT_32)) - FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS)); - break; - case SLJIT_CONV_F64_FROM_F32: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | FD(dst_r) | FS2(src), MOVABLE_INS)); - op ^= SLJIT_32; - break; - } - - if (dst & SLJIT_MEM) - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0)); - return SLJIT_SUCCESS; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - sljit_s32 dst_r, flags = 0; - - CHECK_ERROR(); - CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); - ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src1, src1w); - ADJUST_LOCAL_OFFSET(src2, src2w); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; - - if (src1 & SLJIT_MEM) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { - FAIL_IF(compiler->error); - src1 = TMP_FREG1; - } else - flags |= SLOW_SRC1; - } - - if (src2 & SLJIT_MEM) { - if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { - FAIL_IF(compiler->error); - src2 = TMP_FREG2; - } else - flags |= SLOW_SRC2; - } - - if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); - } - } - else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); - else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); - - if (flags & SLOW_SRC1) - src1 = TMP_FREG1; - if (flags & SLOW_SRC2) - src2 = TMP_FREG2; - - switch (GET_OPCODE(op)) { - case SLJIT_ADD_F64: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); - break; - - case SLJIT_SUB_F64: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); - break; - - case SLJIT_MUL_F64: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); - break; - - case SLJIT_DIV_F64: - FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | FD(dst_r) | FS1(src1) | FS2(src2), MOVABLE_INS)); - break; - } - - if (dst_r == TMP_FREG2) - FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); - - return SLJIT_SUCCESS; -} - -#undef FLOAT_DATA -#undef SELECT_FOP - -/* --------------------------------------------------------------------- */ -/* Other instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); - ADJUST_LOCAL_OFFSET(dst, dstw); - - if (FAST_IS_REG(dst)) - return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), UNMOVABLE_INS); - - /* Memory. */ - FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw)); - compiler->delay_slot = UNMOVABLE_INS; - return SLJIT_SUCCESS; -} - -/* --------------------------------------------------------------------- */ -/* Conditional instructions */ -/* --------------------------------------------------------------------- */ - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) -{ - struct sljit_label *label; - - CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_label(compiler)); - - if (compiler->last_label && compiler->last_label->size == compiler->size) - return compiler->last_label; - - label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); - PTR_FAIL_IF(!label); - set_label(label, compiler); - compiler->delay_slot = UNMOVABLE_INS; - return label; -} - -static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) -{ - switch (type) { - case SLJIT_EQUAL: - case SLJIT_NOT_EQUAL_F64: /* Unordered. */ - return DA(0x1); - - case SLJIT_NOT_EQUAL: - case SLJIT_EQUAL_F64: - return DA(0x9); - - case SLJIT_LESS: - case SLJIT_GREATER_F64: /* Unordered. */ - case SLJIT_CARRY: - return DA(0x5); - - case SLJIT_GREATER_EQUAL: - case SLJIT_LESS_EQUAL_F64: - case SLJIT_NOT_CARRY: - return DA(0xd); - - case SLJIT_GREATER: - case SLJIT_GREATER_EQUAL_F64: /* Unordered. */ - return DA(0xc); - - case SLJIT_LESS_EQUAL: - case SLJIT_LESS_F64: - return DA(0x4); - - case SLJIT_SIG_LESS: - return DA(0x3); - - case SLJIT_SIG_GREATER_EQUAL: - return DA(0xb); - - case SLJIT_SIG_GREATER: - return DA(0xa); - - case SLJIT_SIG_LESS_EQUAL: - return DA(0x2); - - case SLJIT_OVERFLOW: - if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) - return DA(0x9); - /* fallthrough */ - - case SLJIT_UNORDERED_F64: - return DA(0x7); - - case SLJIT_NOT_OVERFLOW: - if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB))) - return DA(0x1); - /* fallthrough */ - - case SLJIT_ORDERED_F64: - return DA(0xf); - - default: - SLJIT_UNREACHABLE(); - return DA(0x8); - } -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type) -{ - struct sljit_jump *jump; - - CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_jump(compiler, type)); - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - type &= 0xff; - - if (type < SLJIT_EQUAL_F64) { - jump->flags |= IS_COND; - if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) - jump->flags |= IS_MOVABLE; -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS)); -#else -#error "Implementation required" -#endif - } - else if (type < SLJIT_JUMP) { - jump->flags |= IS_COND; - if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET)) - jump->flags |= IS_MOVABLE; -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type ^ 1) | 5, UNMOVABLE_INS)); -#else -#error "Implementation required" -#endif - } - else { - if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) - jump->flags |= IS_MOVABLE; - if (type >= SLJIT_FAST_CALL) - jump->flags |= IS_CALL; - } - - PTR_FAIL_IF(emit_const(compiler, TMP_REG1, 0)); - PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG1) | IMM(0), UNMOVABLE_INS)); - jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - - return jump; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 arg_types) -{ - CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); - - PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - - return sljit_emit_jump(compiler, type); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) -{ - struct sljit_jump *jump = NULL; - sljit_s32 src_r; - - CHECK_ERROR(); - CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (FAST_IS_REG(src)) - src_r = src; - else if (src & SLJIT_IMM) { - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - FAIL_IF(!jump); - set_jump(jump, compiler, JUMP_ADDR); - jump->u.target = (sljit_uw)srcw; - - if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) - jump->flags |= IS_MOVABLE; - if (type >= SLJIT_FAST_CALL) - jump->flags |= IS_CALL; - - FAIL_IF(emit_const(compiler, TMP_REG1, 0)); - src_r = TMP_REG1; - } - else { - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); - src_r = TMP_REG1; - } - - FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS)); - if (jump) - jump->addr = compiler->size; - return push_inst(compiler, NOP, UNMOVABLE_INS); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 arg_types, - sljit_s32 src, sljit_sw srcw) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); - - if (src & SLJIT_MEM) { - ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw)); - src = TMP_REG1; - } - - FAIL_IF(call_with_args(compiler, arg_types, &src)); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - - return sljit_emit_ijump(compiler, type, src, srcw); -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, - sljit_s32 dst, sljit_sw dstw, - sljit_s32 type) -{ - sljit_s32 reg; - sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; - - CHECK_ERROR(); - CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); - ADJUST_LOCAL_OFFSET(dst, dstw); - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - op = GET_OPCODE(op); - reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); - - type &= 0xff; - if (type < SLJIT_EQUAL_F64) - FAIL_IF(push_inst(compiler, BICC | get_cc(compiler, type) | 3, UNMOVABLE_INS)); - else - FAIL_IF(push_inst(compiler, FBFCC | get_cc(compiler, type) | 3, UNMOVABLE_INS)); - - FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS)); - - if (op >= SLJIT_ADD) { - flags |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE; - if (dst & SLJIT_MEM) - return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); - return emit_op(compiler, op, flags, dst, 0, dst, 0, TMP_REG2, 0); - } - - if (!(dst & SLJIT_MEM)) - return SLJIT_SUCCESS; - - return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw); -#else -#error "Implementation required" -#endif -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) -{ - CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - -#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; -#else -#error "Implementation required" -#endif -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) -{ - struct sljit_const *const_; - sljit_s32 dst_r; - - CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); - ADJUST_LOCAL_OFFSET(dst, dstw); - - const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); - PTR_FAIL_IF(!const_); - set_const(const_, compiler); - - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; - PTR_FAIL_IF(emit_const(compiler, dst_r, init_value)); - - if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); - return const_; -} - -SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) -{ - struct sljit_put_label *put_label; - sljit_s32 dst_r; - - CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw)); - ADJUST_LOCAL_OFFSET(dst, dstw); - - put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label)); - PTR_FAIL_IF(!put_label); - set_put_label(put_label, compiler, 0); - - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; - PTR_FAIL_IF(emit_const(compiler, dst_r, 0)); - - if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); - return put_label; -} diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c index b9a7b39..6773c71 100644 --- a/src/sljit/sljitNativeX86_32.c +++ b/src/sljit/sljitNativeX86_32.c @@ -80,21 +80,28 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw if (b & SLJIT_MEM) { if (!(b & REG_MASK)) inst_size += sizeof(sljit_sw); - else if (immb != 0 && !(b & OFFS_REG_MASK)) { - /* Immediate operand. */ - if (immb <= 127 && immb >= -128) - inst_size += sizeof(sljit_s8); - else - inst_size += sizeof(sljit_sw); + else { + if (immb != 0 && !(b & OFFS_REG_MASK)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_s8); + else + inst_size += sizeof(sljit_sw); + } + else if (reg_map[b & REG_MASK] == 5) { + /* Swap registers if possible. */ + if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5) + b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); + else + inst_size += sizeof(sljit_s8); + } + + if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); + + if (b & OFFS_REG_MASK) + inst_size += 1; /* SIB byte. */ } - else if (reg_map[b & REG_MASK] == 5) - inst_size += sizeof(sljit_s8); - - if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) - b |= TO_OFFS_REG(SLJIT_SP); - - if (b & OFFS_REG_MASK) - inst_size += 1; /* SIB byte. */ } /* Calculate size of a. */ @@ -109,7 +116,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw else if (flags & EX86_SHIFT_INS) { imma &= 0x1f; if (imma != 1) { - inst_size ++; + inst_size++; flags |= EX86_BYTE_ARG; } } else if (flags & EX86_BYTE_ARG) @@ -165,7 +172,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw } else if (b & REG_MASK) { reg_map_b = reg_map[b & REG_MASK]; - if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_map_b == 5) { + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { if (immb != 0 || reg_map_b == 5) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; @@ -190,8 +197,14 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw } } else { + if (reg_map_b == 5) + *buf_ptr |= 0x40; + *buf_ptr++ |= 0x04; *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6)); + + if (reg_map_b == 5) + *buf_ptr++ = 0; } } else { @@ -243,12 +256,8 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ return code_ptr; } -#define ENTER_R2_USED 0x00001 -#define ENTER_R2_TO_S 0x00002 -#define ENTER_R2_TO_R0 0x00004 -#define ENTER_R1_TO_S 0x00008 -#define ENTER_TMP_TO_R4 0x00010 -#define ENTER_TMP_TO_S 0x00020 +#define ENTER_TMP_TO_R4 0x00001 +#define ENTER_TMP_TO_S 0x00002 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, @@ -256,6 +265,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi { sljit_s32 word_arg_count, saved_arg_count, float_arg_count; sljit_s32 size, locals_offset, args_size, types, status; + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_u8 *inst; #ifdef _WIN32 sljit_s32 r2_offset = -1; @@ -271,64 +281,72 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start); arg_types >>= SLJIT_ARG_SHIFT; - types = arg_types; word_arg_count = 0; - saved_arg_count = 0; - float_arg_count = 0; - args_size = SSIZE_OF(sw); status = 0; - while (types) { - switch (types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - float_arg_count++; - FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); - args_size += SSIZE_OF(f64); - break; - case SLJIT_ARG_TYPE_F32: - float_arg_count++; - FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); - args_size += SSIZE_OF(f32); - break; - default: - word_arg_count++; - if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - saved_arg_count++; - if (saved_arg_count == 4) - status |= ENTER_TMP_TO_S; - } else { - if (word_arg_count == 4) + if (options & SLJIT_ENTER_REG_ARG) { + args_size = 0; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + word_arg_count++; + if (word_arg_count >= 4) { status |= ENTER_TMP_TO_R4; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (word_arg_count == 3) - status |= ENTER_R2_USED; -#endif + args_size = SSIZE_OF(sw); + } } -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL)) - break; -#endif - - args_size += SSIZE_OF(sw); - break; + arg_types >>= SLJIT_ARG_SHIFT; } - types >>= SLJIT_ARG_SHIFT; + } else { + types = arg_types; + saved_arg_count = 0; + float_arg_count = 0; + args_size = SSIZE_OF(sw); + while (types) { + switch (types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + float_arg_count++; + FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size)); + args_size += SSIZE_OF(f32); + break; + default: + word_arg_count++; + + if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) + saved_arg_count++; + + if (word_arg_count == 4) { + if (types & SLJIT_ARG_TYPE_SCRATCH_REG) { + status |= ENTER_TMP_TO_R4; + arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT); + } else if (saved_arg_count == 4) { + status |= ENTER_TMP_TO_S; + arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT); + } + } + + args_size += SSIZE_OF(sw); + break; + } + types >>= SLJIT_ARG_SHIFT; + } + + args_size -= SSIZE_OF(sw); } - args_size -= SSIZE_OF(sw); compiler->args_size = args_size; /* [esp+0] for saving temporaries and function calls. */ locals_offset = 2 * SSIZE_OF(sw); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if ((options & SLJIT_ENTER_CDECL) && scratches >= 3) - locals_offset = 4 * SSIZE_OF(sw); -#else if (scratches >= 3) locals_offset = 4 * SSIZE_OF(sw); -#endif compiler->scratches_offset = locals_offset; @@ -340,39 +358,37 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi compiler->locals_offset = locals_offset; - size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); - inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1)); - FAIL_IF(!inst); + size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count; + if (!(options & SLJIT_ENTER_REG_ARG)) + size++; - INC_SIZE((sljit_uw)size); - PUSH_REG(reg_map[TMP_REG1]); - if (saveds > 2 || scratches > 9) - PUSH_REG(reg_map[SLJIT_S2]); - if (saveds > 1 || scratches > 10) - PUSH_REG(reg_map[SLJIT_S1]); - if (saveds > 0 || scratches > 11) - PUSH_REG(reg_map[SLJIT_S0]); + if (size != 0) { + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1)); + FAIL_IF(!inst); - size *= SSIZE_OF(sw); + INC_SIZE((sljit_uw)size); + + if (!(options & SLJIT_ENTER_REG_ARG)) + PUSH_REG(reg_map[TMP_REG1]); + + if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9) + PUSH_REG(reg_map[SLJIT_S2]); + if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10) + PUSH_REG(reg_map[SLJIT_S1]); + if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11) + PUSH_REG(reg_map[SLJIT_S0]); + + size *= SSIZE_OF(sw); + } if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size); size += SSIZE_OF(sw); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!(options & SLJIT_ENTER_CDECL)) - size += args_size; -#endif - local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size; compiler->local_size = local_size; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!(options & SLJIT_ENTER_CDECL)) - size -= args_size; -#endif - word_arg_count = 0; saved_arg_count = 0; args_size = size; @@ -386,64 +402,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi break; default: word_arg_count++; + SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)))); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!(options & SLJIT_ENTER_CDECL) && word_arg_count <= 2) { - if (word_arg_count == 1) { - if (status & ENTER_R2_USED) { - EMIT_MOV(compiler, (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? SLJIT_R0 : SLJIT_S0, 0, SLJIT_R2, 0); - } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - status |= ENTER_R2_TO_S; - saved_arg_count++; - } else - status |= ENTER_R2_TO_R0; - } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - status |= ENTER_R1_TO_S; - saved_arg_count++; - } - break; - } -#endif if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) { - SLJIT_ASSERT(word_arg_count <= 3 || (status & ENTER_TMP_TO_R4)); - - if (word_arg_count <= 3) { #ifdef _WIN32 - if (word_arg_count == 3 && local_size > 4 * 4096) - r2_offset = local_size + args_size; - else + if (word_arg_count == 3 && local_size > 4 * 4096) + r2_offset = local_size + args_size; + else #endif - EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); - } - } else { - SLJIT_ASSERT(saved_arg_count <= 3 || (status & ENTER_TMP_TO_S)); + EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); - if (saved_arg_count <= 3) - EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); + } else { + EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size); saved_arg_count++; } + args_size += SSIZE_OF(sw); break; } arg_types >>= SLJIT_ARG_SHIFT; } -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!(options & SLJIT_ENTER_CDECL)) { - if (status & ENTER_R2_TO_R0) - EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0); - - saved_arg_count = 0; - if (status & ENTER_R2_TO_S) { - EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0); - saved_arg_count++; - } - - if (status & ENTER_R1_TO_S) - EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_R1, 0); - } -#endif - SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); #ifdef _WIN32 @@ -459,6 +438,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3); } else { + if (options & SLJIT_ENTER_REG_ARG) { + SLJIT_ASSERT(r2_offset == -1); + + inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1)); + FAIL_IF(!inst); + INC_SIZE(1); + PUSH_REG(reg_map[SLJIT_R2]); + + local_size -= SSIZE_OF(sw); + r2_offset = local_size; + } + EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12); BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096); @@ -490,8 +481,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #endif /* _WIN32 */ + locals_offset -= SSIZE_OF(sw); + kept_saveds_count = SLJIT_R3 - kept_saveds_count; + + while (saved_arg_count > 3) { + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), locals_offset, kept_saveds_count, 0); + kept_saveds_count++; + locals_offset -= SSIZE_OF(sw); + saved_arg_count--; + } + if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) { - size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : compiler->locals_offset - SSIZE_OF(sw); + size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : locals_offset; EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0); } @@ -503,9 +504,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_s32 args_size, locals_offset; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - sljit_s32 word_arg_count = 0; -#endif CHECK_ERROR(); CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); @@ -513,25 +511,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp arg_types >>= SLJIT_ARG_SHIFT; args_size = 0; - while (arg_types) { - switch (arg_types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - args_size += SSIZE_OF(f64); - break; - case SLJIT_ARG_TYPE_F32: - args_size += SSIZE_OF(f32); - break; - default: -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (word_arg_count >= 2) + + if (!(options & SLJIT_ENTER_REG_ARG)) { + while (arg_types) { + switch (arg_types & SLJIT_ARG_MASK) { + case SLJIT_ARG_TYPE_F64: + args_size += SSIZE_OF(f64); + break; + case SLJIT_ARG_TYPE_F32: + args_size += SSIZE_OF(f32); + break; + default: args_size += SSIZE_OF(sw); - word_arg_count++; -#else - args_size += SSIZE_OF(sw); -#endif - break; + break; + } + arg_types >>= SLJIT_ARG_SHIFT; } - arg_types >>= SLJIT_ARG_SHIFT; } compiler->args_size = args_size; @@ -539,13 +534,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp /* [esp+0] for saving temporaries and function calls. */ locals_offset = 2 * SSIZE_OF(sw); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if ((options & SLJIT_ENTER_CDECL) && scratches >= 3) - locals_offset = 4 * SSIZE_OF(sw); -#else if (scratches >= 3) locals_offset = 4 * SSIZE_OF(sw); -#endif compiler->scratches_offset = locals_offset; @@ -557,12 +547,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp compiler->locals_offset = locals_offset; - saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw); + saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!(options & SLJIT_ENTER_CDECL)) - saveds += args_size; -#endif + if (!(options & SLJIT_ENTER_REG_ARG)) + saveds += SSIZE_OF(sw); compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds; return SLJIT_SUCCESS; @@ -570,30 +558,44 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) { + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + sljit_s32 saveds; sljit_uw size; sljit_u8 *inst; - size = (sljit_uw)(1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + - (compiler->saveds <= 3 ? compiler->saveds : 3)); + BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); + + size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count); + + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) + size++; + + if (size == 0) + return SLJIT_SUCCESS; + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); INC_SIZE(size); - if (compiler->saveds > 0 || compiler->scratches > 11) + saveds = compiler->saveds; + + if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) POP_REG(reg_map[SLJIT_S0]); - if (compiler->saveds > 1 || compiler->scratches > 10) + if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) POP_REG(reg_map[SLJIT_S1]); - if (compiler->saveds > 2 || compiler->scratches > 9) + if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9) POP_REG(reg_map[SLJIT_S2]); - POP_REG(reg_map[TMP_REG1]); + + if (!(compiler->options & SLJIT_ENTER_REG_ARG)) + POP_REG(reg_map[TMP_REG1]); return SLJIT_SUCCESS; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) { - sljit_uw size; sljit_u8 *inst; CHECK_ERROR(); @@ -602,27 +604,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler SLJIT_ASSERT(compiler->args_size >= 0); SLJIT_ASSERT(compiler->local_size > 0); - BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); - FAIL_IF(emit_stack_frame_release(compiler)); - size = 1; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) - size = 3; -#endif - inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); - - INC_SIZE(size); - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) { - RET_I16(U8(compiler->args_size)); - return SLJIT_SUCCESS; - } -#endif - + INC_SIZE(1); RET(); return SLJIT_SUCCESS; } @@ -631,114 +617,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler /* Call / return instructions */ /* --------------------------------------------------------------------- */ -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - -static sljit_sw c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) -{ - sljit_sw stack_size = 0; - sljit_s32 word_arg_count = 0; - - arg_types >>= SLJIT_ARG_SHIFT; - - while (arg_types) { - switch (arg_types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - stack_size += SSIZE_OF(f64); - break; - case SLJIT_ARG_TYPE_F32: - stack_size += SSIZE_OF(f32); - break; - default: - word_arg_count++; - if (word_arg_count > 2) - stack_size += SSIZE_OF(sw); - break; - } - - arg_types >>= SLJIT_ARG_SHIFT; - } - - if (word_arg_count_ptr) - *word_arg_count_ptr = word_arg_count; - - return stack_size; -} - -static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler, - sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args) -{ - sljit_u8 *inst; - sljit_s32 float_arg_count; - - if (stack_size == SSIZE_OF(sw) && word_arg_count == 3) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - PUSH_REG(reg_map[SLJIT_R2]); - } - else if (stack_size > 0) { - if (word_arg_count >= 4) - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); - - BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0); - - stack_size = 0; - arg_types >>= SLJIT_ARG_SHIFT; - word_arg_count = 0; - float_arg_count = 0; - while (arg_types) { - switch (arg_types & SLJIT_ARG_MASK) { - case SLJIT_ARG_TYPE_F64: - float_arg_count++; - FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += SSIZE_OF(f64); - break; - case SLJIT_ARG_TYPE_F32: - float_arg_count++; - FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count)); - stack_size += SSIZE_OF(f32); - break; - default: - word_arg_count++; - if (word_arg_count == 3) { - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0); - stack_size += SSIZE_OF(sw); - } - else if (word_arg_count == 4) { - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0); - stack_size += SSIZE_OF(sw); - } - break; - } - - arg_types >>= SLJIT_ARG_SHIFT; - } - } - - if (word_arg_count > 0) { - if (swap_args) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - - *inst++ = U8(XCHG_EAX_r | reg_map[SLJIT_R2]); - } - else { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); - FAIL_IF(!inst); - INC_SIZE(2); - - *inst++ = MOV_r_rm; - *inst++ = U8(MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]); - } - } - - return SLJIT_SUCCESS; -} - -#endif - -static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) +static sljit_s32 call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr) { sljit_sw stack_size = 0; sljit_s32 word_arg_count = 0; @@ -771,7 +650,7 @@ static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, slji return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf); } -static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler, +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count) { sljit_s32 float_arg_count = 0; @@ -840,21 +719,19 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, sljit_s32 *extra_space, sljit_s32 arg_types, sljit_s32 src, sljit_sw srcw) { - sljit_sw args_size, prev_args_size, saved_regs_size; + sljit_sw args_size, saved_regs_size; sljit_sw types, word_arg_count, float_arg_count; sljit_sw stack_size, prev_stack_size, min_size, offset; sljit_sw word_arg4_offset; sljit_u8 r2_offset = 0; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL; -#endif + sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); sljit_u8* inst; ADJUST_LOCAL_OFFSET(src, srcw); CHECK_EXTRA_REGS(src, srcw, (void)0); saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) - + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw); + + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw); word_arg_count = 0; float_arg_count = 0; @@ -876,30 +753,15 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, break; default: word_arg_count++; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!fast_call || word_arg_count > 2) - args_size += SSIZE_OF(sw); -#else args_size += SSIZE_OF(sw); -#endif break; } arg_types >>= SLJIT_ARG_SHIFT; } - if (args_size <= compiler->args_size -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - && (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call) -#endif /* SLJIT_X86_32_FASTCALL */ - && 1) { -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - *extra_space = fast_call ? 0 : args_size; - prev_args_size = compiler->args_size; - stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size; -#else /* !SLJIT_X86_32_FASTCALL */ + if (args_size <= compiler->args_size) { *extra_space = 0; stack_size = args_size + SSIZE_OF(sw) + saved_regs_size; -#endif /* SLJIT_X86_32_FASTCALL */ offset = stack_size + compiler->local_size; @@ -911,37 +773,6 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw); } -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!(compiler->options & SLJIT_ENTER_CDECL)) { - if (!fast_call) - offset -= SSIZE_OF(sw); - - if (word_arg_count >= 3) { - word_arg4_offset = SSIZE_OF(sw); - - if (word_arg_count + float_arg_count >= 4) { - word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw); - if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64) - word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64); - } - - /* In cdecl mode, at least one more word value must - * be present on the stack before the return address. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0); - } - - if (fast_call) { - if (args_size < prev_args_size) { - EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw)); - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0); - } - } else if (prev_args_size > 0) { - EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size); - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); - } - } -#endif /* SLJIT_X86_32_FASTCALL */ - while (types != 0) { switch (types & SLJIT_ARG_MASK) { case SLJIT_ARG_TYPE_F64: @@ -957,12 +788,6 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, default: switch (word_arg_count) { case 1: -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (fast_call) { - EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0); - break; - } -#endif offset -= SSIZE_OF(sw); if (r2_offset != 0) { EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); @@ -971,10 +796,6 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); break; case 2: -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (fast_call) - break; -#endif offset -= SSIZE_OF(sw); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); break; @@ -993,15 +814,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, types >>= SLJIT_ARG_SHIFT; } - BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0); - FAIL_IF(emit_stack_frame_release(compiler)); - -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (args_size < prev_args_size) - BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0); -#endif - - return SLJIT_SUCCESS; + return emit_stack_frame_release(compiler); } stack_size = args_size + SSIZE_OF(sw); @@ -1014,13 +827,7 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, if (word_arg_count >= 3) stack_size += SSIZE_OF(sw); - prev_args_size = 0; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (!(compiler->options & SLJIT_ENTER_CDECL)) - prev_args_size = compiler->args_size; -#endif - - prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size; + prev_stack_size = SSIZE_OF(sw) + saved_regs_size; min_size = prev_stack_size + compiler->local_size; word_arg4_offset = compiler->scratches_offset; @@ -1050,75 +857,30 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, } /* Restore saved registers. */ - offset = stack_size - prev_args_size - 2 * SSIZE_OF(sw); + offset = stack_size - 2 * SSIZE_OF(sw); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset); if (compiler->saveds > 2 || compiler->scratches > 9) { offset -= SSIZE_OF(sw); EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset); } - if (compiler->saveds > 1 || compiler->scratches > 10) { + if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) { offset -= SSIZE_OF(sw); EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset); } - if (compiler->saveds > 0 || compiler->scratches > 11) { + if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) { offset -= SSIZE_OF(sw); EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset); } /* Copy fourth argument and return address. */ -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (fast_call) { - offset = stack_size; - *extra_space = 0; + offset = stack_size - SSIZE_OF(sw); + *extra_space = args_size; - if (word_arg_count >= 4 && prev_args_size == 0) { - offset -= SSIZE_OF(sw); - inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset); - FAIL_IF(!inst); - *inst = XCHG_r_rm; - - SLJIT_ASSERT(args_size != prev_args_size); - } else { - if (word_arg_count >= 4) { - offset -= SSIZE_OF(sw); - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); - } - - if (args_size != prev_args_size) - EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw)); - } - - if (args_size != prev_args_size) - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - args_size - SSIZE_OF(sw), SLJIT_R2, 0); - } else { -#endif /* SLJIT_X86_32_FASTCALL */ - offset = stack_size - SSIZE_OF(sw); - *extra_space = args_size; - - if (word_arg_count >= 4 && prev_args_size == SSIZE_OF(sw)) { - offset -= SSIZE_OF(sw); - inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset); - FAIL_IF(!inst); - *inst = XCHG_r_rm; - - SLJIT_ASSERT(prev_args_size > 0); - } else { - if (word_arg_count >= 4) { - offset -= SSIZE_OF(sw); - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); - } - - if (prev_args_size > 0) - EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw)); - } - - /* Copy return address. */ - if (prev_args_size > 0) - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - SSIZE_OF(sw), SLJIT_R2, 0); -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (word_arg_count >= 4) { + offset -= SSIZE_OF(sw); + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0); } -#endif /* SLJIT_X86_32_FASTCALL */ while (types != 0) { switch (types & SLJIT_ARG_MASK) { @@ -1135,12 +897,6 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, default: switch (word_arg_count) { case 1: -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (fast_call) { - EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0); - break; - } -#endif offset -= SSIZE_OF(sw); if (r2_offset != 0) { EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0); @@ -1149,10 +905,6 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0); break; case 2: -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if (fast_call) - break; -#endif offset -= SSIZE_OF(sw); EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0); break; @@ -1168,12 +920,6 @@ static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler, types >>= SLJIT_ARG_SHIFT; } -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - /* Skip return address. */ - if (fast_call) - offset -= SSIZE_OF(sw); -#endif - SLJIT_ASSERT(offset >= 0); if (offset == 0) @@ -1198,6 +944,41 @@ static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 e return SLJIT_SUCCESS; } +static sljit_s32 call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 is_tail) +{ + sljit_s32 word_arg_count = 0; + sljit_s32 kept_saveds_count, offset; + + arg_types >>= SLJIT_ARG_SHIFT; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) + word_arg_count++; + + arg_types >>= SLJIT_ARG_SHIFT; + } + + if (word_arg_count < 4) + return SLJIT_SUCCESS; + + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset); + + if (!is_tail) + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0); + + kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + offset = compiler->local_size + SSIZE_OF(sw); + + if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) + offset += SSIZE_OF(sw); + if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) + offset += SSIZE_OF(sw); + if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9) + offset += SSIZE_OF(sw); + + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0); +} + SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types) { @@ -1209,18 +990,21 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); if (type & SLJIT_CALL_RETURN) { + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(call_reg_arg_with_args(compiler, arg_types, 1)); + PTR_FAIL_IF(emit_stack_frame_release(compiler)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP)); + } + stack_size = type; PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); - if (stack_size == 0) { - type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); - return sljit_emit_jump(compiler, type); - } + if (stack_size == 0) + return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP)); jump = sljit_emit_jump(compiler, type); PTR_FAIL_IF(jump == NULL); @@ -1229,32 +1013,17 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile return jump; } -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - if ((type & 0xff) == SLJIT_CALL) { - stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); - PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0)); + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + PTR_FAIL_IF(call_reg_arg_with_args(compiler, arg_types, 0)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - - jump = sljit_emit_jump(compiler, type); - PTR_FAIL_IF(jump == NULL); - - PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0)); - return jump; + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_jump(compiler, type); } -#endif - stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); - PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + stack_size = call_get_stack_size(compiler, arg_types, &word_arg_count); + PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count)); + SLJIT_SKIP_CHECKS(compiler); jump = sljit_emit_jump(compiler, type); PTR_FAIL_IF(jump == NULL); @@ -1268,14 +1037,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi { sljit_sw stack_size = 0; sljit_s32 word_arg_count; -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - sljit_s32 swap_args; -#endif CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); if (type & SLJIT_CALL_RETURN) { + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + FAIL_IF(call_reg_arg_with_args(compiler, arg_types, 1)); + + if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0)) { + ADJUST_LOCAL_OFFSET(src, srcw); + CHECK_EXTRA_REGS(src, srcw, (void)0); + + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); + } + stack_size = type; FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw)); @@ -1284,10 +1068,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi srcw = 0; } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); if (stack_size == 0) return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); @@ -1296,51 +1077,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi return emit_tail_call_end(compiler, stack_size); } -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3); + if ((type & 0xff) == SLJIT_CALL_REG_ARG) { + FAIL_IF(call_reg_arg_with_args(compiler, arg_types, 0)); - if ((type & 0xff) == SLJIT_CALL) { - stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count); - swap_args = 0; - - if (word_arg_count > 0) { - if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) { - swap_args = 1; - if (((src & REG_MASK) | 0x2) == SLJIT_R2) - src ^= 0x2; - if ((OFFS_REG(src) | 0x2) == SLJIT_R2) - src ^= TO_OFFS_REG(0x2); - } - } - - FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args)); - - compiler->scratches_offset += stack_size; - compiler->locals_offset += stack_size; - -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); - - compiler->scratches_offset -= stack_size; - compiler->locals_offset -= stack_size; - - return post_call_with_args(compiler, arg_types, 0); + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, type, src, srcw); } -#endif - stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count); - FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count)); + stack_size = call_get_stack_size(compiler, arg_types, &word_arg_count); + FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count)); compiler->scratches_offset += stack_size; compiler->locals_offset += stack_size; -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw)); compiler->scratches_offset -= stack_size; diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c index f37df6e..fe65d23 100644 --- a/src/sljit/sljitNativeX86_64.c +++ b/src/sljit/sljitNativeX86_64.c @@ -101,34 +101,38 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw /* Calculate size of b. */ inst_size += 1; /* mod r/m byte. */ if (b & SLJIT_MEM) { - if (!(b & OFFS_REG_MASK)) { - if (NOT_HALFWORD(immb)) { - PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); - immb = 0; - if (b & REG_MASK) - b |= TO_OFFS_REG(TMP_REG2); - else - b |= TMP_REG2; - } - else if (reg_lmap[b & REG_MASK] == 4) - b |= TO_OFFS_REG(SLJIT_SP); + if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) { + PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG2); + else + b |= TMP_REG2; } if (!(b & REG_MASK)) inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */ else { - if (reg_map[b & REG_MASK] >= 8) - rex |= REX_B; - - if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { + if (immb != 0 && !(b & OFFS_REG_MASK)) { /* Immediate operand. */ if (immb <= 127 && immb >= -128) inst_size += sizeof(sljit_s8); else inst_size += sizeof(sljit_s32); } - else if (reg_lmap[b & REG_MASK] == 5) - inst_size += sizeof(sljit_s8); + else if (reg_lmap[b & REG_MASK] == 5) { + /* Swap registers if possible. */ + if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5) + b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); + else + inst_size += sizeof(sljit_s8); + } + + if (reg_map[b & REG_MASK] >= 8) + rex |= REX_B; + + if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); if (b & OFFS_REG_MASK) { inst_size += 1; /* SIB byte. */ @@ -155,7 +159,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw else if (flags & EX86_SHIFT_INS) { imma &= compiler->mode32 ? 0x1f : 0x3f; if (imma != 1) { - inst_size ++; + inst_size++; flags |= EX86_BYTE_ARG; } } else if (flags & EX86_BYTE_ARG) @@ -223,7 +227,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw } else if (b & REG_MASK) { reg_lmap_b = reg_lmap[b & REG_MASK]; - if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_lmap_b == 5) { + if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { if (immb != 0 || reg_lmap_b == 5) { if (immb <= 127 && immb >= -128) *buf_ptr |= 0x40; @@ -248,8 +252,14 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw } } else { + if (reg_lmap_b == 5) + *buf_ptr |= 0x40; + *buf_ptr++ |= 0x04; *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); + + if (reg_lmap_b == 5) + *buf_ptr++ = 0; } } else { @@ -366,7 +376,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi { sljit_uw size; sljit_s32 word_arg_count = 0; - sljit_s32 saved_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_s32 saved_regs_size, tmp, i; #ifdef _WIN64 sljit_s32 saved_float_regs_size; @@ -379,16 +389,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); + if (options & SLJIT_ENTER_REG_ARG) + arg_types = 0; + /* Emit ENDBR64 at function entry if needed. */ FAIL_IF(emit_endbranch(compiler)); compiler->mode32 = 0; /* Including the return address saved by the call instruction. */ - saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1); tmp = SLJIT_S0 - saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { size = reg_map[i] >= 8 ? 2 : 1; inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); @@ -561,7 +574,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp #endif /* _WIN64 */ /* Including the return address saved by the call instruction. */ - saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1); compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size; return SLJIT_SUCCESS; } @@ -633,8 +646,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) POP_REG(reg_lmap[i]); } - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; - for (i = tmp; i <= SLJIT_S0; i++) { + tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); + for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) { size = reg_map[i] >= 8 ? 2 : 1; inst = (sljit_u8*)ensure_buf(compiler, 1 + size); FAIL_IF(!inst); @@ -786,17 +799,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile compiler->mode32 = 0; - PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL)); if (type & SLJIT_CALL_RETURN) { PTR_FAIL_IF(emit_stack_frame_release(compiler)); type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, type); } @@ -822,16 +833,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi } FAIL_IF(emit_stack_frame_release(compiler)); - type = SLJIT_JUMP; } - FAIL_IF(call_with_args(compiler, arg_types, &src)); + if ((type & 0xff) != SLJIT_CALL_REG_ARG) + FAIL_IF(call_with_args(compiler, arg_types, &src)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + if (type & SLJIT_CALL_RETURN) + type = SLJIT_JUMP; + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_ijump(compiler, type, src, srcw); } diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c index c7dd9be..e7d230d 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/src/sljit/sljitNativeX86_common.c @@ -26,11 +26,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { -#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - return "x86" SLJIT_CPUINFO " ABI:fastcall"; -#else return "x86" SLJIT_CPUINFO; -#endif } /* @@ -379,29 +375,41 @@ static sljit_u8 get_jump_code(sljit_uw type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_EQUAL_F64: + case SLJIT_F_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_EQUAL: /* Not supported. */ return 0x84 /* je */; case SLJIT_NOT_EQUAL: - case SLJIT_NOT_EQUAL_F64: + case SLJIT_F_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */ return 0x85 /* jne */; case SLJIT_LESS: case SLJIT_CARRY: - case SLJIT_LESS_F64: + case SLJIT_F_LESS: + case SLJIT_UNORDERED_OR_LESS: + case SLJIT_UNORDERED_OR_GREATER: return 0x82 /* jc */; case SLJIT_GREATER_EQUAL: case SLJIT_NOT_CARRY: - case SLJIT_GREATER_EQUAL_F64: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: return 0x83 /* jae */; case SLJIT_GREATER: - case SLJIT_GREATER_F64: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_LESS: + case SLJIT_ORDERED_GREATER: return 0x87 /* jnbe */; case SLJIT_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: + case SLJIT_F_LESS_EQUAL: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: return 0x86 /* jbe */; case SLJIT_SIG_LESS: @@ -422,10 +430,10 @@ static sljit_u8 get_jump_code(sljit_uw type) case SLJIT_NOT_OVERFLOW: return 0x81 /* jno */; - case SLJIT_UNORDERED_F64: + case SLJIT_UNORDERED: return 0x8a /* jp */; - case SLJIT_ORDERED_F64: + case SLJIT_ORDERED: return 0x8b /* jpo */; } return 0; @@ -682,6 +690,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) } } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL) + return 0; + + switch (type) { + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + return 0; + } + + return 1; +} + /* --------------------------------------------------------------------- */ /* Operators */ /* --------------------------------------------------------------------- */ @@ -2312,10 +2334,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); if (opcode != SLJIT_SUB && opcode != SLJIT_AND) { -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); } @@ -2516,6 +2535,19 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + switch (GET_FLAG_TYPE(op)) { + case SLJIT_ORDERED_LESS: + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + case SLJIT_UNORDERED_OR_GREATER: + case SLJIT_ORDERED_LESS_EQUAL: + if (!FAST_IS_REG(src2)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w)); + src2 = TMP_FREG; + } + + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w); + } + if (!FAST_IS_REG(src1)) { FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w)); src1 = TMP_FREG; @@ -2769,7 +2801,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co ADJUST_LOCAL_OFFSET(dst, dstw); CHECK_EXTRA_REGS(dst, dstw, (void)0); - type &= 0xff; /* setcc = jcc + 0x10. */ cond_set = U8(get_jump_code((sljit_uw)type) + 0x10); @@ -2813,10 +2844,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return emit_mov(compiler, dst, dstw, TMP_REG1, 0); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); #else @@ -2927,10 +2955,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co if (GET_OPCODE(op) < SLJIT_ADD) return emit_mov(compiler, dst, dstw, TMP_REG1, 0); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); #endif /* SLJIT_CONFIG_X86_64 */ } @@ -2971,7 +2996,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; - *inst = U8(get_jump_code(type & 0xff) - 0x40); + *inst = U8(get_jump_code((sljit_uw)type) - 0x40); return SLJIT_SUCCESS; } diff --git a/src/sljit/sljitWXExecAllocator.c b/src/sljit/sljitWXExecAllocator.c index 72d5b8d..6893813 100644 --- a/src/sljit/sljitWXExecAllocator.c +++ b/src/sljit/sljitWXExecAllocator.c @@ -59,38 +59,15 @@ #include #ifdef __NetBSD__ -#if defined(PROT_MPROTECT) -#define check_se_protected(ptr, size) (0) #define SLJIT_PROT_WX PROT_MPROTECT(PROT_EXEC) -#else /* !PROT_MPROTECT */ -#ifdef _NETBSD_SOURCE -#include -#else /* !_NETBSD_SOURCE */ -typedef unsigned int u_int; -#define devmajor_t sljit_s32 -#endif /* _NETBSD_SOURCE */ -#include -#include - -#define check_se_protected(ptr, size) netbsd_se_protected() - -static SLJIT_INLINE int netbsd_se_protected(void) -{ - int mib[3]; - int paxflags; - size_t len = sizeof(paxflags); - - mib[0] = CTL_PROC; - mib[1] = getpid(); - mib[2] = PROC_PID_PAXFLAGS; - - if (SLJIT_UNLIKELY(sysctl(mib, 3, &paxflags, &len, NULL, 0) < 0)) - return -1; - - return (paxflags & CTL_PROC_PAXFLAGS_MPROTECT) ? -1 : 0; -} -#endif /* PROT_MPROTECT */ +#define check_se_protected(ptr, size) (0) #else /* POSIX */ +#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) +#include +#define SLJIT_SE_LOCK() pthread_mutex_lock(&se_lock) +#define SLJIT_SE_UNLOCK() pthread_mutex_unlock(&se_lock) +#endif /* !SLJIT_SINGLE_THREADED */ + #define check_se_protected(ptr, size) generic_se_protected(ptr, size) static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size) @@ -102,22 +79,20 @@ static SLJIT_INLINE int generic_se_protected(void *ptr, sljit_uw size) } #endif /* NetBSD */ -#if defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED +#ifndef SLJIT_SE_LOCK #define SLJIT_SE_LOCK() +#endif +#ifndef SLJIT_SE_UNLOCK #define SLJIT_SE_UNLOCK() -#else /* !SLJIT_SINGLE_THREADED */ -#include -#define SLJIT_SE_LOCK() pthread_mutex_lock(&se_lock) -#define SLJIT_SE_UNLOCK() pthread_mutex_unlock(&se_lock) -#endif /* SLJIT_SINGLE_THREADED */ - +#endif #ifndef SLJIT_PROT_WX #define SLJIT_PROT_WX 0 -#endif /* !SLJIT_PROT_WX */ +#endif SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) { -#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) +#if !(defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) \ + && !defined(__NetBSD__) static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER; #endif static int se_protected = !SLJIT_PROT_WX;