JIT compiler update.
This commit is contained in:
parent
b0fb99a952
commit
6f6d44305a
|
@ -147,17 +147,23 @@
|
|||
#define SLJIT_CONFIG_UNSUPPORTED 1
|
||||
#endif
|
||||
|
||||
#else /* !_WIN32 */
|
||||
#else /* _WIN32 */
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define SLJIT_CONFIG_X86_64 1
|
||||
#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
|
||||
#define SLJIT_CONFIG_ARM_THUMB2 1
|
||||
#elif (defined(_M_ARM) && _M_ARM >= 7)
|
||||
#define SLJIT_CONFIG_ARM_V7 1
|
||||
#elif defined(_ARM_)
|
||||
#define SLJIT_CONFIG_ARM_V5 1
|
||||
#elif defined(_M_ARM64) || defined(__aarch64__)
|
||||
#define SLJIT_CONFIG_ARM_64 1
|
||||
#else
|
||||
#define SLJIT_CONFIG_X86_32 1
|
||||
#endif
|
||||
|
||||
#endif /* !WIN32 */
|
||||
#endif /* !_WIN32 */
|
||||
#endif /* SLJIT_CONFIG_AUTO */
|
||||
|
||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
|
@ -324,6 +330,11 @@
|
|||
sparc_cache_flush((from), (to))
|
||||
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
|
||||
|
||||
#elif defined _WIN32
|
||||
|
||||
#define SLJIT_CACHE_FLUSH(from, to) \
|
||||
FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
|
||||
|
||||
#else
|
||||
|
||||
/* Calls __ARM_NR_cacheflush on ARM-Linux. */
|
||||
|
@ -371,12 +382,18 @@ typedef int sljit_sw;
|
|||
#define SLJIT_64BIT_ARCHITECTURE 1
|
||||
#define SLJIT_WORD_SHIFT 3
|
||||
#ifdef _WIN32
|
||||
#ifdef __GNUC__
|
||||
/* These types do not require windows.h */
|
||||
typedef unsigned long long sljit_uw;
|
||||
typedef long long sljit_sw;
|
||||
#else
|
||||
typedef unsigned __int64 sljit_uw;
|
||||
typedef __int64 sljit_sw;
|
||||
#else
|
||||
#endif
|
||||
#else /* !_WIN32 */
|
||||
typedef unsigned long int sljit_uw;
|
||||
typedef long int sljit_sw;
|
||||
#endif
|
||||
#endif /* _WIN32 */
|
||||
#endif
|
||||
|
||||
typedef sljit_uw sljit_p;
|
||||
|
@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
|
|||
|
||||
#define SLJIT_NUMBER_OF_REGISTERS 26
|
||||
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
|
||||
#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
|
||||
#define SLJIT_LOCALS_OFFSET_BASE 0
|
||||
|
||||
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
|
||||
|
||||
|
|
|
@ -26,6 +26,13 @@
|
|||
|
||||
#include "sljitLir.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */
|
||||
#include <windows.h>
|
||||
|
||||
#endif /* _WIN32 */
|
||||
|
||||
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
|
||||
|
||||
/* These libraries are needed for the macros below. */
|
||||
|
@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
|
|||
|
||||
#endif
|
||||
|
||||
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
|
||||
#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
|
||||
&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
|
||||
{
|
||||
|
|
|
@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins;
|
|||
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
|
||||
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
|
||||
#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
|
||||
#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5)
|
||||
#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
|
||||
|
||||
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
|
||||
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
|
||||
|
||||
/* r18 - platform register, currently not used */
|
||||
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
|
||||
31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31
|
||||
31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
|
||||
};
|
||||
|
||||
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
||||
|
@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
|
||||
#define ADC 0x9a000000
|
||||
#define ADD 0x8b000000
|
||||
#define ADDE 0x8b200000
|
||||
#define ADDI 0x91000000
|
||||
#define AND 0x8a000000
|
||||
#define ANDI 0x92000000
|
||||
|
@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define FSUB 0x1e603800
|
||||
#define LDRI 0xf9400000
|
||||
#define LDP 0xa9400000
|
||||
#define LDP_PST 0xa8c00000
|
||||
#define LDP_PRE 0xa9c00000
|
||||
#define LDR_PRE 0xf8400c00
|
||||
#define LSLV 0x9ac02000
|
||||
#define LSRV 0x9ac02400
|
||||
#define MADD 0x9b000000
|
||||
|
@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
|
||||
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
|
||||
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
|
||||
local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
|
||||
local_size = (local_size + 15) & ~0xf;
|
||||
compiler->local_size = local_size;
|
||||
|
||||
if (local_size <= (63 * sizeof(sljit_sw))) {
|
||||
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
|
||||
| RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
|
||||
offs = (local_size - saved_regs_size) << (15 - 3);
|
||||
} else {
|
||||
offs = 0 << 15;
|
||||
if (saved_regs_size & 0x8) {
|
||||
offs = 1 << 15;
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
|
||||
if (saved_regs_size & 0x8)
|
||||
saved_regs_size += sizeof(sljit_sw);
|
||||
}
|
||||
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
|
||||
if (saved_regs_size > 0)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
|
||||
}
|
||||
|
||||
local_size = (local_size + 15) & ~0xf;
|
||||
compiler->local_size = local_size + saved_regs_size;
|
||||
|
||||
FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
|
||||
| RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size >= 4096)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
|
||||
else if (local_size > 256)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
|
||||
#endif
|
||||
|
||||
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
|
||||
prev = -1;
|
||||
offs = 2 << 15;
|
||||
for (i = SLJIT_S0; i >= tmp; i--) {
|
||||
if (prev == -1) {
|
||||
if (!(offs & (1 << 15))) {
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
|
||||
offs += 1 << 15;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
|
||||
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
|
||||
offs += 2 << 15;
|
||||
prev = -1;
|
||||
}
|
||||
|
||||
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
|
||||
if (prev == -1) {
|
||||
if (!(offs & (1 << 15))) {
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
|
||||
offs += 1 << 15;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
|
||||
FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
|
||||
offs += 2 << 15;
|
||||
prev = -1;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(prev == -1);
|
||||
if (prev != -1)
|
||||
FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
|
||||
|
||||
if (compiler->local_size > (63 * sizeof(sljit_sw))) {
|
||||
/* The local_size is already adjusted by the saved registers. */
|
||||
if (local_size > 0xfff) {
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
|
||||
local_size &= 0xfff;
|
||||
}
|
||||
if (local_size)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
|
||||
FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
|
||||
| RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
|
||||
}
|
||||
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
|
||||
|
||||
args = get_arg_count(arg_types);
|
||||
|
||||
|
@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
if (args >= 3)
|
||||
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size >= 4096) {
|
||||
if (local_size < 4 * 4096) {
|
||||
/* No need for a loop. */
|
||||
if (local_size >= 2 * 4096) {
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
|
||||
local_size -= 4096;
|
||||
}
|
||||
|
||||
if (local_size >= 2 * 4096) {
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
|
||||
local_size -= 4096;
|
||||
}
|
||||
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
local_size -= 4096;
|
||||
}
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
|
||||
FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
|
||||
FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
|
||||
local_size &= 0xfff;
|
||||
}
|
||||
|
||||
if (local_size > 256) {
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
}
|
||||
else if (local_size > 0)
|
||||
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
|
||||
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
|
||||
}
|
||||
else if (local_size > 256) {
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
|
||||
}
|
||||
else if (local_size > 0)
|
||||
FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
|
||||
|
||||
#else /* !_WIN32 */
|
||||
|
||||
/* The local_size does not include saved registers size. */
|
||||
if (local_size > 0xfff) {
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
|
||||
local_size &= 0xfff;
|
||||
}
|
||||
if (local_size != 0)
|
||||
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
|
||||
|
||||
#endif /* _WIN32 */
|
||||
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
|
|||
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
|
||||
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
|
||||
{
|
||||
sljit_s32 saved_regs_size;
|
||||
|
||||
CHECK_ERROR();
|
||||
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
|
||||
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
|
||||
|
||||
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
|
||||
local_size = (local_size + 15) & ~0xf;
|
||||
compiler->local_size = local_size;
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
|
||||
if (saved_regs_size & 0x8)
|
||||
saved_regs_size += sizeof(sljit_sw);
|
||||
|
||||
compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
|
|||
|
||||
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
|
||||
|
||||
local_size = compiler->local_size;
|
||||
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
|
||||
if (local_size <= (63 * sizeof(sljit_sw)))
|
||||
offs = (local_size - saved_regs_size) << (15 - 3);
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
|
||||
| RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
|
||||
offs = 0 << 15;
|
||||
if (saved_regs_size & 0x8) {
|
||||
offs = 1 << 15;
|
||||
saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
|
||||
if (saved_regs_size & 0x8)
|
||||
saved_regs_size += sizeof(sljit_sw);
|
||||
|
||||
local_size = compiler->local_size - saved_regs_size;
|
||||
|
||||
/* Load LR as early as possible. */
|
||||
if (local_size == 0)
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
|
||||
else if (local_size < 63 * sizeof(sljit_sw)) {
|
||||
FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
|
||||
| RN(SLJIT_SP) | (local_size << (15 - 3))));
|
||||
}
|
||||
local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
|
||||
else {
|
||||
if (local_size > 0xfff) {
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
|
||||
local_size &= 0xfff;
|
||||
}
|
||||
if (local_size)
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
|
||||
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
|
||||
}
|
||||
|
||||
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
|
||||
prev = -1;
|
||||
offs = 2 << 15;
|
||||
for (i = SLJIT_S0; i >= tmp; i--) {
|
||||
if (prev == -1) {
|
||||
if (!(offs & (1 << 15))) {
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
|
||||
offs += 1 << 15;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
|
||||
offs += 2 << 15;
|
||||
prev = -1;
|
||||
}
|
||||
|
||||
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
|
||||
if (prev == -1) {
|
||||
if (!(offs & (1 << 15))) {
|
||||
prev = i;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
|
||||
offs += 1 << 15;
|
||||
continue;
|
||||
}
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
|
||||
FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
|
||||
offs += 2 << 15;
|
||||
prev = -1;
|
||||
}
|
||||
|
||||
SLJIT_ASSERT(prev == -1);
|
||||
if (prev != -1)
|
||||
FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
|
||||
|
||||
if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
|
||||
FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
|
||||
| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
|
||||
} else if (saved_regs_size > 0) {
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
|
||||
}
|
||||
|
||||
FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
|
||||
return SLJIT_SUCCESS;
|
||||
/* These two can be executed in parallel. */
|
||||
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
|
||||
return push_inst(compiler, RET | RN(TMP_LR));
|
||||
}
|
||||
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
|
|||
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
|
||||
{
|
||||
sljit_s32 dst_reg;
|
||||
sljit_ins ins;
|
||||
|
||||
CHECK_ERROR();
|
||||
CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
|
||||
|
||||
SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
|
||||
|
||||
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
|
||||
|
||||
if (offset <= 0xffffff && offset >= -0xffffff) {
|
||||
ins = ADDI;
|
||||
if (offset < 0) {
|
||||
offset = -offset;
|
||||
ins = SUBI;
|
||||
}
|
||||
|
||||
if (offset <= 0xfff)
|
||||
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
|
||||
else {
|
||||
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
|
||||
|
||||
offset &= 0xfff;
|
||||
if (offset != 0)
|
||||
FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
|
||||
}
|
||||
}
|
||||
else {
|
||||
FAIL_IF(load_immediate (compiler, dst_reg, offset));
|
||||
/* Add extended register form. */
|
||||
FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
|
||||
}
|
||||
|
||||
if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
|
||||
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
|
||||
{
|
||||
struct sljit_const *const_;
|
||||
|
|
|
@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define ASRSI 0x1000
|
||||
#define ASR_W 0xfa40f000
|
||||
#define ASR_WI 0xea4f0020
|
||||
#define BCC 0xd000
|
||||
#define BICI 0xf0200000
|
||||
#define BKPT 0xbe00
|
||||
#define BLX 0x4780
|
||||
|
@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define EORS 0x4040
|
||||
#define EOR_W 0xea800000
|
||||
#define IT 0xbf00
|
||||
#define LDRI 0xf8500800
|
||||
#define LSLS 0x4080
|
||||
#define LSLSI 0x0000
|
||||
#define LSL_W 0xfa00f000
|
||||
|
@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define SBCI 0xf1600000
|
||||
#define SBCS 0x4180
|
||||
#define SBC_W 0xeb600000
|
||||
#define SDIV 0xfb90f0f0
|
||||
#define SMULL 0xfb800000
|
||||
#define STR_SP 0x9000
|
||||
#define SUBS 0x1a00
|
||||
|
@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
|
|||
#define SXTH 0xb200
|
||||
#define SXTH_W 0xfa0ff080
|
||||
#define TST 0x4200
|
||||
#define UDIV 0xfbb0f0f0
|
||||
#define UMULL 0xfba00000
|
||||
#define UXTB 0xb2c0
|
||||
#define UXTB_W 0xfa5ff080
|
||||
|
@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw
|
|||
|
||||
/* Really complex instruction form for branches. */
|
||||
s = (diff >> 23) & 0x1;
|
||||
j1 = (~(diff >> 21) ^ s) & 0x1;
|
||||
j2 = (~(diff >> 22) ^ s) & 0x1;
|
||||
j1 = (~(diff >> 22) ^ s) & 0x1;
|
||||
j2 = (~(diff >> 21) ^ s) & 0x1;
|
||||
jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
|
||||
jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
|
||||
|
||||
|
@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
|
|||
{
|
||||
sljit_uw tmp;
|
||||
|
||||
/* MOVS cannot be used since it destroy flags. */
|
||||
|
||||
if (imm >= 0x10000) {
|
||||
tmp = get_imm(imm);
|
||||
if (tmp != INVALID_IMM)
|
||||
|
@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
{
|
||||
sljit_s32 args, size, i, tmp;
|
||||
sljit_ins push = 0;
|
||||
#ifdef _WIN32
|
||||
sljit_uw imm;
|
||||
#endif
|
||||
|
||||
CHECK_ERROR();
|
||||
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
|
||||
|
@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
|
||||
local_size = ((size + local_size + 7) & ~7) - size;
|
||||
compiler->local_size = local_size;
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size >= 256) {
|
||||
if (local_size > 4096)
|
||||
imm = get_imm(4096);
|
||||
else
|
||||
imm = get_imm(local_size & ~0xff);
|
||||
|
||||
SLJIT_ASSERT(imm != INVALID_IMM);
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm));
|
||||
}
|
||||
#else
|
||||
if (local_size > 0) {
|
||||
if (local_size <= (127 << 2))
|
||||
FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
|
||||
else
|
||||
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
|
||||
}
|
||||
#endif
|
||||
|
||||
args = get_arg_count(arg_types);
|
||||
|
||||
|
@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
if (args >= 3)
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size >= 256) {
|
||||
if (local_size > 4096) {
|
||||
imm = get_imm(4096);
|
||||
SLJIT_ASSERT(imm != INVALID_IMM);
|
||||
|
||||
if (local_size < 4 * 4096) {
|
||||
if (local_size > 2 * 4096) {
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
|
||||
local_size -= 4096;
|
||||
}
|
||||
|
||||
if (local_size > 2 * 4096) {
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
|
||||
local_size -= 4096;
|
||||
}
|
||||
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
local_size -= 4096;
|
||||
|
||||
SLJIT_ASSERT(local_size > 0);
|
||||
}
|
||||
else {
|
||||
FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1));
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
|
||||
SLJIT_ASSERT(reg_map[SLJIT_R3] < 7);
|
||||
FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1));
|
||||
FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
|
||||
|
||||
local_size &= 0xfff;
|
||||
|
||||
if (local_size != 0)
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
|
||||
}
|
||||
|
||||
if (local_size >= 256) {
|
||||
imm = get_imm(local_size & ~0xff);
|
||||
SLJIT_ASSERT(imm != INVALID_IMM);
|
||||
|
||||
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
|
||||
}
|
||||
}
|
||||
|
||||
local_size &= 0xff;
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size));
|
||||
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1)));
|
||||
}
|
||||
else if (local_size > 0)
|
||||
FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size));
|
||||
#endif
|
||||
|
||||
return SLJIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
|
|||
/* Operators */
|
||||
/* --------------------------------------------------------------------- */
|
||||
|
||||
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#ifdef _WIN32
|
||||
extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
|
||||
extern long long __rt_sdiv(int denominator, int numerator);
|
||||
#elif defined(__GNUC__)
|
||||
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
|
||||
extern int __aeabi_idivmod(int numerator, int denominator);
|
||||
#else
|
||||
|
@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator);
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
|
||||
|
||||
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
|
||||
{
|
||||
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
|
||||
sljit_sw saved_reg_list[3];
|
||||
sljit_sw saved_reg_count;
|
||||
#endif
|
||||
|
||||
CHECK_ERROR();
|
||||
CHECK(check_sljit_emit_op0(compiler, op));
|
||||
|
@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
|
|||
| (reg_map[SLJIT_R0] << 12)
|
||||
| (reg_map[SLJIT_R0] << 16)
|
||||
| reg_map[SLJIT_R1]);
|
||||
#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
|
||||
case SLJIT_DIVMOD_UW:
|
||||
case SLJIT_DIVMOD_SW:
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
|
||||
FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
|
||||
FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
|
||||
return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
|
||||
case SLJIT_DIV_UW:
|
||||
case SLJIT_DIV_SW:
|
||||
return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
|
||||
#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
|
||||
case SLJIT_DIVMOD_UW:
|
||||
case SLJIT_DIVMOD_SW:
|
||||
case SLJIT_DIV_UW:
|
||||
|
@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#ifdef _WIN32
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
|
||||
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
|
||||
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
|
||||
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv))));
|
||||
#elif defined(__GNUC__)
|
||||
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
|
||||
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
|
||||
#else
|
||||
|
@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
|
|||
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
|
||||
}
|
||||
return SLJIT_SUCCESS;
|
||||
#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
|
||||
}
|
||||
|
||||
return SLJIT_SUCCESS;
|
||||
|
|
|
@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
|
||||
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
|
||||
if (args > 0) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
|
||||
inst += 2;
|
||||
}
|
||||
if (args > 1) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
|
||||
inst += 2;
|
||||
}
|
||||
if (args > 2) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
|
||||
*inst++ = 0x24;
|
||||
*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
|
||||
inst[2] = 0x24;
|
||||
inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
|
||||
}
|
||||
#else
|
||||
if (args > 0) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
|
||||
*inst++ = sizeof(sljit_sw) * 2;
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
|
||||
inst[2] = sizeof(sljit_sw) * 2;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 1) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
|
||||
*inst++ = sizeof(sljit_sw) * 3;
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
|
||||
inst[2] = sizeof(sljit_sw) * 3;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 2) {
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
|
||||
*inst++ = sizeof(sljit_sw) * 4;
|
||||
inst[0] = MOV_r_rm;
|
||||
inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
|
||||
inst[2] = sizeof(sljit_sw) * 4;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
compiler->local_size = local_size;
|
||||
|
||||
#ifdef _WIN32
|
||||
if (local_size > 1024) {
|
||||
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
|
||||
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
|
||||
#else
|
||||
/* Space for a single argument. This amount is excluded when the stack is allocated below. */
|
||||
local_size -= sizeof(sljit_sw);
|
||||
FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
|
||||
if (local_size > 0) {
|
||||
if (local_size <= 4 * 4096) {
|
||||
if (local_size > 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
|
||||
if (local_size > 2 * 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
|
||||
if (local_size > 3 * 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
|
||||
}
|
||||
else {
|
||||
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
|
||||
EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
|
||||
|
||||
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
|
||||
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096);
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
|
||||
#endif
|
||||
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
|
||||
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
|
||||
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
|
||||
FAIL_IF(!inst);
|
||||
|
||||
INC_SIZE(2);
|
||||
inst[0] = JNE_i8;
|
||||
inst[1] = (sljit_s8) -16;
|
||||
}
|
||||
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
|
||||
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
|
||||
|
||||
compiler->mode32 = 0;
|
||||
|
||||
#ifdef _WIN64
|
||||
/* Two/four register slots for parameters plus space for xmm6 register if needed. */
|
||||
if (fscratches >= 6 || fsaveds >= 1)
|
||||
|
@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
|
||||
#ifndef _WIN64
|
||||
if (args > 0) {
|
||||
*inst++ = REX_W;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
|
||||
inst[0] = REX_W;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 1) {
|
||||
*inst++ = REX_W | REX_R;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
|
||||
inst[0] = REX_W | REX_R;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 2) {
|
||||
*inst++ = REX_W | REX_R;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
|
||||
inst[0] = REX_W | REX_R;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
|
||||
}
|
||||
#else
|
||||
if (args > 0) {
|
||||
*inst++ = REX_W;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
|
||||
inst[0] = REX_W;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 1) {
|
||||
*inst++ = REX_W;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
|
||||
inst[0] = REX_W;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
|
||||
inst += 3;
|
||||
}
|
||||
if (args > 2) {
|
||||
*inst++ = REX_W | REX_B;
|
||||
*inst++ = MOV_r_rm;
|
||||
*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
|
||||
inst[0] = REX_W | REX_B;
|
||||
inst[1] = MOV_r_rm;
|
||||
inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
|
|||
compiler->local_size = local_size;
|
||||
|
||||
#ifdef _WIN64
|
||||
if (local_size > 1024) {
|
||||
/* Allocate stack for the callback, which grows the stack. */
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
|
||||
FAIL_IF(!inst);
|
||||
INC_SIZE(4 + (3 + sizeof(sljit_s32)));
|
||||
*inst++ = REX_W;
|
||||
*inst++ = GROUP_BINARY_83;
|
||||
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
|
||||
/* Allocated size for registers must be divisible by 8. */
|
||||
SLJIT_ASSERT(!(saved_register_size & 0x7));
|
||||
/* Aligned to 16 byte. */
|
||||
if (saved_register_size & 0x8) {
|
||||
*inst++ = 5 * sizeof(sljit_sw);
|
||||
local_size -= 5 * sizeof(sljit_sw);
|
||||
} else {
|
||||
*inst++ = 4 * sizeof(sljit_sw);
|
||||
local_size -= 4 * sizeof(sljit_sw);
|
||||
if (local_size > 0) {
|
||||
if (local_size <= 4 * 4096) {
|
||||
if (local_size > 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
|
||||
if (local_size > 2 * 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
|
||||
if (local_size > 3 * 4096)
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
|
||||
}
|
||||
/* Second instruction */
|
||||
SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
|
||||
*inst++ = REX_W;
|
||||
*inst++ = MOV_rm_i32;
|
||||
*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
|
||||
sljit_unaligned_store_s32(inst, local_size);
|
||||
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|
||||
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
|
||||
compiler->skip_checks = 1;
|
||||
#endif
|
||||
FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
|
||||
else {
|
||||
EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
|
||||
|
||||
SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
|
||||
|
||||
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
|
||||
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
|
||||
FAIL_IF(!inst);
|
||||
|
||||
INC_SIZE(2);
|
||||
inst[0] = JNE_i8;
|
||||
inst[1] = (sljit_s8) -19;
|
||||
}
|
||||
|
||||
EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (local_size > 0) {
|
||||
if (local_size <= 127) {
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
|
||||
FAIL_IF(!inst);
|
||||
INC_SIZE(4);
|
||||
*inst++ = REX_W;
|
||||
*inst++ = GROUP_BINARY_83;
|
||||
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
|
||||
*inst++ = local_size;
|
||||
}
|
||||
else {
|
||||
inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
|
||||
FAIL_IF(!inst);
|
||||
INC_SIZE(7);
|
||||
*inst++ = REX_W;
|
||||
*inst++ = GROUP_BINARY_81;
|
||||
*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
|
||||
sljit_unaligned_store_s32(inst, local_size);
|
||||
inst += sizeof(sljit_s32);
|
||||
}
|
||||
FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
|
||||
SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
|
||||
}
|
||||
|
||||
#ifdef _WIN64
|
||||
|
|
|
@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
|
|||
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
|
||||
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <malloc.h>
|
||||
|
||||
static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
|
||||
{
|
||||
/* Workaround for calling the internal _chkstk() function on Windows.
|
||||
This function touches all 4k pages belongs to the requested stack space,
|
||||
which size is passed in local_size. This is necessary on Windows where
|
||||
the stack can only grow in 4k steps. However, this function just burn
|
||||
CPU cycles if the stack is large enough. However, you don't know it in
|
||||
advance, so it must always be called. I think this is a bad design in
|
||||
general even if it has some reasons. */
|
||||
*(volatile sljit_s32*)alloca(local_size) = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
|
||||
#include "sljitNativeX86_32.c"
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue