JIT compiler update.

This commit is contained in:
Zoltán Herczeg 2018-03-13 12:05:48 +00:00
parent b0fb99a952
commit 6f6d44305a
7 changed files with 405 additions and 210 deletions

View File

@ -147,17 +147,23 @@
#define SLJIT_CONFIG_UNSUPPORTED 1 #define SLJIT_CONFIG_UNSUPPORTED 1
#endif #endif
#else /* !_WIN32 */ #else /* _WIN32 */
#if defined(_M_X64) || defined(__x86_64__) #if defined(_M_X64) || defined(__x86_64__)
#define SLJIT_CONFIG_X86_64 1 #define SLJIT_CONFIG_X86_64 1
#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
#define SLJIT_CONFIG_ARM_THUMB2 1
#elif (defined(_M_ARM) && _M_ARM >= 7)
#define SLJIT_CONFIG_ARM_V7 1
#elif defined(_ARM_) #elif defined(_ARM_)
#define SLJIT_CONFIG_ARM_V5 1 #define SLJIT_CONFIG_ARM_V5 1
#elif defined(_M_ARM64) || defined(__aarch64__)
#define SLJIT_CONFIG_ARM_64 1
#else #else
#define SLJIT_CONFIG_X86_32 1 #define SLJIT_CONFIG_X86_32 1
#endif #endif
#endif /* !WIN32 */ #endif /* !_WIN32 */
#endif /* SLJIT_CONFIG_AUTO */ #endif /* SLJIT_CONFIG_AUTO */
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
@ -324,6 +330,11 @@
sparc_cache_flush((from), (to)) sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 #define SLJIT_CACHE_FLUSH_OWN_IMPL 1
#elif defined _WIN32
#define SLJIT_CACHE_FLUSH(from, to) \
FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
#else #else
/* Calls __ARM_NR_cacheflush on ARM-Linux. */ /* Calls __ARM_NR_cacheflush on ARM-Linux. */
@ -371,12 +382,18 @@ typedef int sljit_sw;
#define SLJIT_64BIT_ARCHITECTURE 1 #define SLJIT_64BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 3 #define SLJIT_WORD_SHIFT 3
#ifdef _WIN32 #ifdef _WIN32
#ifdef __GNUC__
/* These types do not require windows.h */
typedef unsigned long long sljit_uw;
typedef long long sljit_sw;
#else
typedef unsigned __int64 sljit_uw; typedef unsigned __int64 sljit_uw;
typedef __int64 sljit_sw; typedef __int64 sljit_sw;
#else #endif
#else /* !_WIN32 */
typedef unsigned long int sljit_uw; typedef unsigned long int sljit_uw;
typedef long int sljit_sw; typedef long int sljit_sw;
#endif #endif /* _WIN32 */
#endif #endif
typedef sljit_uw sljit_p; typedef sljit_uw sljit_p;
@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 26 #define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw)) #define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)

View File

@ -26,6 +26,13 @@
#include "sljitLir.h" #include "sljitLir.h"
#ifdef _WIN32
/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */
#include <windows.h>
#endif /* _WIN32 */
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
/* These libraries are needed for the macros below. */ /* These libraries are needed for the macros below. */
@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
#endif #endif
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{ {

View File

@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins;
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4) #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5) #define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
/* r18 - platform register, currently not used */ /* r18 - platform register, currently not used */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
}; };
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ADC 0x9a000000 #define ADC 0x9a000000
#define ADD 0x8b000000 #define ADD 0x8b000000
#define ADDE 0x8b200000
#define ADDI 0x91000000 #define ADDI 0x91000000
#define AND 0x8a000000 #define AND 0x8a000000
#define ANDI 0x92000000 #define ANDI 0x92000000
@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define FSUB 0x1e603800 #define FSUB 0x1e603800
#define LDRI 0xf9400000 #define LDRI 0xf9400000
#define LDP 0xa9400000 #define LDP 0xa9400000
#define LDP_PST 0xa8c00000 #define LDP_PRE 0xa9c00000
#define LDR_PRE 0xf8400c00
#define LSLV 0x9ac02000 #define LSLV 0x9ac02000
#define LSRV 0x9ac02400 #define LSRV 0x9ac02400
#define MADD 0x9b000000 #define MADD 0x9b000000
@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; if (saved_regs_size & 0x8)
local_size = (local_size + 15) & ~0xf;
compiler->local_size = local_size;
if (local_size <= (63 * sizeof(sljit_sw))) {
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
| RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
offs = (local_size - saved_regs_size) << (15 - 3);
} else {
offs = 0 << 15;
if (saved_regs_size & 0x8) {
offs = 1 << 15;
saved_regs_size += sizeof(sljit_sw); saved_regs_size += sizeof(sljit_sw);
}
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; local_size = (local_size + 15) & ~0xf;
if (saved_regs_size > 0) compiler->local_size = local_size + saved_regs_size;
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
} FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
| RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
#ifdef _WIN32
if (local_size >= 4096)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
else if (local_size > 256)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
#endif
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1; prev = -1;
offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) { for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) { if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i; prev = i;
continue; continue;
} }
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 1 << 15;
continue;
}
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
offs += 2 << 15; offs += 2 << 15;
prev = -1; prev = -1;
} }
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) { if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i; prev = i;
continue; continue;
} }
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 1 << 15;
continue;
}
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
offs += 2 << 15; offs += 2 << 15;
prev = -1; prev = -1;
} }
SLJIT_ASSERT(prev == -1); if (prev != -1)
FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
if (compiler->local_size > (63 * sizeof(sljit_sw))) {
/* The local_size is already adjusted by the saved registers. */ FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size)
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
| RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
}
args = get_arg_count(arg_types); args = get_arg_count(arg_types);
@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3) if (args >= 3)
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
#ifdef _WIN32
if (local_size >= 4096) {
if (local_size < 4 * 4096) {
/* No need for a loop. */
if (local_size >= 2 * 4096) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
local_size -= 4096;
}
if (local_size >= 2 * 4096) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
local_size -= 4096;
}
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
local_size -= 4096;
}
else {
FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
local_size &= 0xfff;
}
if (local_size > 256) {
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
}
else if (local_size > 0)
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
}
else if (local_size > 256) {
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
}
else if (local_size > 0)
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
#else /* !_WIN32 */
/* The local_size does not include saved registers size. */
if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size != 0)
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
#endif /* _WIN32 */
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{ {
sljit_s32 saved_regs_size;
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
local_size = (local_size + 15) & ~0xf; if (saved_regs_size & 0x8)
compiler->local_size = local_size; saved_regs_size += sizeof(sljit_sw);
compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
local_size = compiler->local_size; saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
if (saved_regs_size & 0x8)
saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
if (local_size <= (63 * sizeof(sljit_sw)))
offs = (local_size - saved_regs_size) << (15 - 3);
else {
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
| RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
offs = 0 << 15;
if (saved_regs_size & 0x8) {
offs = 1 << 15;
saved_regs_size += sizeof(sljit_sw); saved_regs_size += sizeof(sljit_sw);
local_size = compiler->local_size - saved_regs_size;
/* Load LR as early as possible. */
if (local_size == 0)
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
else if (local_size < 63 * sizeof(sljit_sw)) {
FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
| RN(SLJIT_SP) | (local_size << (15 - 3))));
} }
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; else {
if (local_size > 0xfff) { if (local_size > 0xfff) {
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff; local_size &= 0xfff;
} }
if (local_size) if (local_size)
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
} }
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1; prev = -1;
offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) { for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) { if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i; prev = i;
continue; continue;
} }
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 1 << 15;
continue;
}
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
offs += 2 << 15; offs += 2 << 15;
prev = -1; prev = -1;
} }
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) { if (prev == -1) {
if (!(offs & (1 << 15))) {
prev = i; prev = i;
continue; continue;
} }
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 1 << 15;
continue;
}
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
offs += 2 << 15; offs += 2 << 15;
prev = -1; prev = -1;
} }
SLJIT_ASSERT(prev == -1); if (prev != -1)
FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
if (compiler->local_size <= (63 * sizeof(sljit_sw))) { /* These two can be executed in parallel. */
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); return push_inst(compiler, RET | RN(TMP_LR));
} else if (saved_regs_size > 0) {
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
}
FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
return SLJIT_SUCCESS;
} }
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12)); return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
} }
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
sljit_s32 dst_reg;
sljit_ins ins;
CHECK_ERROR();
CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (offset <= 0xffffff && offset >= -0xffffff) {
ins = ADDI;
if (offset < 0) {
offset = -offset;
ins = SUBI;
}
if (offset <= 0xfff)
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
else {
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
offset &= 0xfff;
if (offset != 0)
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
}
}
else {
FAIL_IF(load_immediate (compiler, dst_reg, offset));
/* Add extended register form. */
FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
}
if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{ {
struct sljit_const *const_; struct sljit_const *const_;

View File

@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ASRSI 0x1000 #define ASRSI 0x1000
#define ASR_W 0xfa40f000 #define ASR_W 0xfa40f000
#define ASR_WI 0xea4f0020 #define ASR_WI 0xea4f0020
#define BCC 0xd000
#define BICI 0xf0200000 #define BICI 0xf0200000
#define BKPT 0xbe00 #define BKPT 0xbe00
#define BLX 0x4780 #define BLX 0x4780
@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define EORS 0x4040 #define EORS 0x4040
#define EOR_W 0xea800000 #define EOR_W 0xea800000
#define IT 0xbf00 #define IT 0xbf00
#define LDRI 0xf8500800
#define LSLS 0x4080 #define LSLS 0x4080
#define LSLSI 0x0000 #define LSLSI 0x0000
#define LSL_W 0xfa00f000 #define LSL_W 0xfa00f000
@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SBCI 0xf1600000 #define SBCI 0xf1600000
#define SBCS 0x4180 #define SBCS 0x4180
#define SBC_W 0xeb600000 #define SBC_W 0xeb600000
#define SDIV 0xfb90f0f0
#define SMULL 0xfb800000 #define SMULL 0xfb800000
#define STR_SP 0x9000 #define STR_SP 0x9000
#define SUBS 0x1a00 #define SUBS 0x1a00
@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SXTH 0xb200 #define SXTH 0xb200
#define SXTH_W 0xfa0ff080 #define SXTH_W 0xfa0ff080
#define TST 0x4200 #define TST 0x4200
#define UDIV 0xfbb0f0f0
#define UMULL 0xfba00000 #define UMULL 0xfba00000
#define UXTB 0xb2c0 #define UXTB 0xb2c0
#define UXTB_W 0xfa5ff080 #define UXTB_W 0xfa5ff080
@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw
/* Really complex instruction form for branches. */ /* Really complex instruction form for branches. */
s = (diff >> 23) & 0x1; s = (diff >> 23) & 0x1;
j1 = (~(diff >> 21) ^ s) & 0x1; j1 = (~(diff >> 22) ^ s) & 0x1;
j2 = (~(diff >> 22) ^ s) & 0x1; j2 = (~(diff >> 21) ^ s) & 0x1;
jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
{ {
sljit_uw tmp; sljit_uw tmp;
/* MOVS cannot be used since it destroy flags. */
if (imm >= 0x10000) { if (imm >= 0x10000) {
tmp = get_imm(imm); tmp = get_imm(imm);
if (tmp != INVALID_IMM) if (tmp != INVALID_IMM)
@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{ {
sljit_s32 args, size, i, tmp; sljit_s32 args, size, i, tmp;
sljit_ins push = 0; sljit_ins push = 0;
#ifdef _WIN32
sljit_uw imm;
#endif
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
local_size = ((size + local_size + 7) & ~7) - size; local_size = ((size + local_size + 7) & ~7) - size;
compiler->local_size = local_size; compiler->local_size = local_size;
#ifdef _WIN32
if (local_size >= 256) {
if (local_size > 4096)
imm = get_imm(4096);
else
imm = get_imm(local_size & ~0xff);
SLJIT_ASSERT(imm != INVALID_IMM);
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm));
}
#else
if (local_size > 0) { if (local_size > 0) {
if (local_size <= (127 << 2)) if (local_size <= (127 << 2))
FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
else else
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
} }
#endif
args = get_arg_count(arg_types); args = get_arg_count(arg_types);
@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3) if (args >= 3)
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
#ifdef _WIN32
if (local_size >= 256) {
if (local_size > 4096) {
imm = get_imm(4096);
SLJIT_ASSERT(imm != INVALID_IMM);
if (local_size < 4 * 4096) {
if (local_size > 2 * 4096) {
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
local_size -= 4096;
}
if (local_size > 2 * 4096) {
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
local_size -= 4096;
}
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
local_size -= 4096;
SLJIT_ASSERT(local_size > 0);
}
else {
FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1));
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
SLJIT_ASSERT(reg_map[SLJIT_R3] < 7);
FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1));
FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
local_size &= 0xfff;
if (local_size != 0)
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
}
if (local_size >= 256) {
imm = get_imm(local_size & ~0xff);
SLJIT_ASSERT(imm != INVALID_IMM);
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
}
}
local_size &= 0xff;
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1)));
}
else if (local_size > 0)
FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size));
#endif
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
/* Operators */ /* Operators */
/* --------------------------------------------------------------------- */ /* --------------------------------------------------------------------- */
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#if defined(__GNUC__) #ifdef _WIN32
extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
extern long long __rt_sdiv(int denominator, int numerator);
#elif defined(__GNUC__)
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
extern int __aeabi_idivmod(int numerator, int denominator); extern int __aeabi_idivmod(int numerator, int denominator);
#else #else
@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator);
} }
#endif #endif
#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{ {
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
sljit_sw saved_reg_list[3]; sljit_sw saved_reg_list[3];
sljit_sw saved_reg_count; sljit_sw saved_reg_count;
#endif
CHECK_ERROR(); CHECK_ERROR();
CHECK(check_sljit_emit_op0(compiler, op)); CHECK(check_sljit_emit_op0(compiler, op));
@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (reg_map[SLJIT_R0] << 12) | (reg_map[SLJIT_R0] << 12)
| (reg_map[SLJIT_R0] << 16) | (reg_map[SLJIT_R0] << 16)
| reg_map[SLJIT_R1]); | reg_map[SLJIT_R1]);
#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
case SLJIT_DIV_UW:
case SLJIT_DIV_SW:
return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW: case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW: case SLJIT_DIV_UW:
@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
} }
} }
#if defined(__GNUC__) #ifdef _WIN32
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv))));
#elif defined(__GNUC__)
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
#else #else
@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
} }
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
} }
return SLJIT_SUCCESS; return SLJIT_SUCCESS;

View File

@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (args > 0) { if (args > 0) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
inst += 2;
} }
if (args > 1) { if (args > 1) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
inst += 2;
} }
if (args > 2) { if (args > 2) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
*inst++ = 0x24; inst[2] = 0x24;
*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
} }
#else #else
if (args > 0) { if (args > 0) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 2; inst[2] = sizeof(sljit_sw) * 2;
inst += 3;
} }
if (args > 1) { if (args > 1) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 3; inst[2] = sizeof(sljit_sw) * 3;
inst += 3;
} }
if (args > 2) { if (args > 2) {
*inst++ = MOV_r_rm; inst[0] = MOV_r_rm;
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
*inst++ = sizeof(sljit_sw) * 4; inst[2] = sizeof(sljit_sw) * 4;
} }
#endif #endif
@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size; compiler->local_size = local_size;
#ifdef _WIN32 #ifdef _WIN32
if (local_size > 1024) { if (local_size > 0) {
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (local_size <= 4 * 4096) {
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); if (local_size > 4096)
#else EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
/* Space for a single argument. This amount is excluded when the stack is allocated below. */ if (local_size > 2 * 4096)
local_size -= sizeof(sljit_sw); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); if (local_size > 3 * 4096)
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
}
else {
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096);
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw))); SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
#endif FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
inst[0] = JNE_i8;
inst[1] = (sljit_s8) -16;
}
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
} }
#endif #endif

View File

@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
compiler->mode32 = 0;
#ifdef _WIN64 #ifdef _WIN64
/* Two/four register slots for parameters plus space for xmm6 register if needed. */ /* Two/four register slots for parameters plus space for xmm6 register if needed. */
if (fscratches >= 6 || fsaveds >= 1) if (fscratches >= 6 || fsaveds >= 1)
@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#ifndef _WIN64 #ifndef _WIN64
if (args > 0) { if (args > 0) {
*inst++ = REX_W; inst[0] = REX_W;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
inst += 3;
} }
if (args > 1) { if (args > 1) {
*inst++ = REX_W | REX_R; inst[0] = REX_W | REX_R;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
inst += 3;
} }
if (args > 2) { if (args > 2) {
*inst++ = REX_W | REX_R; inst[0] = REX_W | REX_R;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
} }
#else #else
if (args > 0) { if (args > 0) {
*inst++ = REX_W; inst[0] = REX_W;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
inst += 3;
} }
if (args > 1) { if (args > 1) {
*inst++ = REX_W; inst[0] = REX_W;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
inst += 3;
} }
if (args > 2) { if (args > 2) {
*inst++ = REX_W | REX_B; inst[0] = REX_W | REX_B;
*inst++ = MOV_r_rm; inst[1] = MOV_r_rm;
*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
} }
#endif #endif
} }
@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size; compiler->local_size = local_size;
#ifdef _WIN64 #ifdef _WIN64
if (local_size > 1024) { if (local_size > 0) {
/* Allocate stack for the callback, which grows the stack. */ if (local_size <= 4 * 4096) {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32))); if (local_size > 4096)
FAIL_IF(!inst); EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
INC_SIZE(4 + (3 + sizeof(sljit_s32))); if (local_size > 2 * 4096)
*inst++ = REX_W; EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
*inst++ = GROUP_BINARY_83; if (local_size > 3 * 4096)
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
/* Allocated size for registers must be divisible by 8. */
SLJIT_ASSERT(!(saved_register_size & 0x7));
/* Aligned to 16 byte. */
if (saved_register_size & 0x8) {
*inst++ = 5 * sizeof(sljit_sw);
local_size -= 5 * sizeof(sljit_sw);
} else {
*inst++ = 4 * sizeof(sljit_sw);
local_size -= 4 * sizeof(sljit_sw);
} }
/* Second instruction */ else {
SLJIT_ASSERT(reg_map[SLJIT_R0] < 8); EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
*inst++ = REX_W; EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
*inst++ = MOV_rm_i32;
*inst++ = MOD_REG | reg_lmap[SLJIT_R0]; SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
sljit_unaligned_store_s32(inst, local_size);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
compiler->skip_checks = 1; SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
#endif FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
INC_SIZE(2);
inst[0] = JNE_i8;
inst[1] = (sljit_s8) -19;
}
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
} }
#endif #endif
if (local_size > 0) { if (local_size > 0) {
if (local_size <= 127) { FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
FAIL_IF(!inst);
INC_SIZE(4);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_83;
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
*inst++ = local_size;
}
else {
inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
FAIL_IF(!inst);
INC_SIZE(7);
*inst++ = REX_W;
*inst++ = GROUP_BINARY_81;
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
sljit_unaligned_store_s32(inst, local_size);
inst += sizeof(sljit_s32);
}
} }
#ifdef _WIN64 #ifdef _WIN64

View File

@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler, static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw); sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
#ifdef _WIN32
#include <malloc.h>
static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
{
/* Workaround for calling the internal _chkstk() function on Windows.
This function touches all 4k pages belongs to the requested stack space,
which size is passed in local_size. This is necessary on Windows where
the stack can only grow in 4k steps. However, this function just burn
CPU cycles if the stack is large enough. However, you don't know it in
advance, so it must always be called. I think this is a bad design in
general even if it has some reasons. */
*(volatile sljit_s32*)alloca(local_size) = 0;
}
#endif
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#include "sljitNativeX86_32.c" #include "sljitNativeX86_32.c"
#else #else