From 63d812ff0eaff56aabd7be3e1e71a7a399691c09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Herczeg?= Date: Tue, 30 Sep 2014 07:08:35 +0000 Subject: [PATCH] Land the JIT compiler (as a separate commit). --- src/sljit/sljitConfig.h | 126 + src/sljit/sljitConfigInternal.h | 693 ++ src/sljit/sljitExecAllocator.c | 312 + src/sljit/sljitLir.c | 2017 +++++ src/sljit/sljitLir.h | 1191 +++ src/sljit/sljitNativeARM_32.c | 2549 ++++++ src/sljit/sljitNativeARM_64.c | 2019 +++++ src/sljit/sljitNativeARM_T2_32.c | 2058 +++++ src/sljit/sljitNativeMIPS_32.c | 366 + src/sljit/sljitNativeMIPS_64.c | 469 ++ src/sljit/sljitNativeMIPS_common.c | 2135 +++++ src/sljit/sljitNativePPC_32.c | 269 + src/sljit/sljitNativePPC_64.c | 421 + src/sljit/sljitNativePPC_common.c | 2374 ++++++ src/sljit/sljitNativeSPARC_32.c | 164 + src/sljit/sljitNativeSPARC_common.c | 1430 ++++ src/sljit/sljitNativeTILEGX-encoder.c | 10159 ++++++++++++++++++++++++ src/sljit/sljitNativeTILEGX_64.c | 2574 ++++++ src/sljit/sljitNativeX86_32.c | 550 ++ src/sljit/sljitNativeX86_64.c | 747 ++ src/sljit/sljitNativeX86_common.c | 2925 +++++++ src/sljit/sljitUtils.c | 332 + 22 files changed, 35880 insertions(+) create mode 100644 src/sljit/sljitConfig.h create mode 100644 src/sljit/sljitConfigInternal.h create mode 100644 src/sljit/sljitExecAllocator.c create mode 100644 src/sljit/sljitLir.c create mode 100644 src/sljit/sljitLir.h create mode 100644 src/sljit/sljitNativeARM_32.c create mode 100644 src/sljit/sljitNativeARM_64.c create mode 100644 src/sljit/sljitNativeARM_T2_32.c create mode 100644 src/sljit/sljitNativeMIPS_32.c create mode 100644 src/sljit/sljitNativeMIPS_64.c create mode 100644 src/sljit/sljitNativeMIPS_common.c create mode 100644 src/sljit/sljitNativePPC_32.c create mode 100644 src/sljit/sljitNativePPC_64.c create mode 100644 src/sljit/sljitNativePPC_common.c create mode 100644 src/sljit/sljitNativeSPARC_32.c create mode 100644 src/sljit/sljitNativeSPARC_common.c create mode 100644 src/sljit/sljitNativeTILEGX-encoder.c create mode 100644 src/sljit/sljitNativeTILEGX_64.c create mode 100644 src/sljit/sljitNativeX86_32.c create mode 100644 src/sljit/sljitNativeX86_64.c create mode 100644 src/sljit/sljitNativeX86_common.c create mode 100644 src/sljit/sljitUtils.c diff --git a/src/sljit/sljitConfig.h b/src/sljit/sljitConfig.h new file mode 100644 index 0000000..10364c3 --- /dev/null +++ b/src/sljit/sljitConfig.h @@ -0,0 +1,126 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SLJIT_CONFIG_H_ +#define _SLJIT_CONFIG_H_ + +/* --------------------------------------------------------------------- */ +/* Custom defines */ +/* --------------------------------------------------------------------- */ + +/* Put your custom defines here. This empty section will never change + which helps maintaining patches (with diff / patch utilities). */ + +/* --------------------------------------------------------------------- */ +/* Architecture */ +/* --------------------------------------------------------------------- */ + +/* Architecture selection. */ +/* #define SLJIT_CONFIG_X86_32 1 */ +/* #define SLJIT_CONFIG_X86_64 1 */ +/* #define SLJIT_CONFIG_ARM_V5 1 */ +/* #define SLJIT_CONFIG_ARM_V7 1 */ +/* #define SLJIT_CONFIG_ARM_THUMB2 1 */ +/* #define SLJIT_CONFIG_ARM_64 1 */ +/* #define SLJIT_CONFIG_PPC_32 1 */ +/* #define SLJIT_CONFIG_PPC_64 1 */ +/* #define SLJIT_CONFIG_MIPS_32 1 */ +/* #define SLJIT_CONFIG_MIPS_64 1 */ +/* #define SLJIT_CONFIG_SPARC_32 1 */ +/* #define SLJIT_CONFIG_TILEGX 1 */ + +/* #define SLJIT_CONFIG_AUTO 1 */ +/* #define SLJIT_CONFIG_UNSUPPORTED 1 */ + +/* --------------------------------------------------------------------- */ +/* Utilities */ +/* --------------------------------------------------------------------- */ + +/* Useful for thread-safe compiling of global functions. */ +#ifndef SLJIT_UTIL_GLOBAL_LOCK +/* Enabled by default */ +#define SLJIT_UTIL_GLOBAL_LOCK 1 +#endif + +/* Implements a stack like data structure (by using mmap / VirtualAlloc). */ +#ifndef SLJIT_UTIL_STACK +/* Enabled by default */ +#define SLJIT_UTIL_STACK 1 +#endif + +/* Single threaded application. Does not require any locks. */ +#ifndef SLJIT_SINGLE_THREADED +/* Disabled by default. */ +#define SLJIT_SINGLE_THREADED 0 +#endif + +/* --------------------------------------------------------------------- */ +/* Configuration */ +/* --------------------------------------------------------------------- */ + +/* If SLJIT_STD_MACROS_DEFINED is not defined, the application should + define SLJIT_MALLOC, SLJIT_FREE, SLJIT_MEMMOVE, and NULL. */ +#ifndef SLJIT_STD_MACROS_DEFINED +/* Disabled by default. */ +#define SLJIT_STD_MACROS_DEFINED 0 +#endif + +/* Executable code allocation: + If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should + define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */ +#ifndef SLJIT_EXECUTABLE_ALLOCATOR +/* Enabled by default. */ +#define SLJIT_EXECUTABLE_ALLOCATOR 1 +#endif + +/* Return with error when an invalid argument is passed. */ +#ifndef SLJIT_ARGUMENT_CHECKS +/* Disabled by default */ +#define SLJIT_ARGUMENT_CHECKS 0 +#endif + +/* Debug checks (assertions, etc.). */ +#ifndef SLJIT_DEBUG +/* Enabled by default */ +#define SLJIT_DEBUG 1 +#endif + +/* Verbose operations. */ +#ifndef SLJIT_VERBOSE +/* Enabled by default */ +#define SLJIT_VERBOSE 1 +#endif + +/* + SLJIT_IS_FPU_AVAILABLE + The availability of the FPU can be controlled by SLJIT_IS_FPU_AVAILABLE. + zero value - FPU is NOT present. + nonzero value - FPU is present. +*/ + +/* For further configurations, see the beginning of sljitConfigInternal.h */ + +#endif diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h new file mode 100644 index 0000000..01a5329 --- /dev/null +++ b/src/sljit/sljitConfigInternal.h @@ -0,0 +1,693 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SLJIT_CONFIG_INTERNAL_H_ +#define _SLJIT_CONFIG_INTERNAL_H_ + +/* + SLJIT defines the following architecture dependent types and macros: + + Types: + sljit_sb, sljit_ub : signed and unsigned 8 bit byte + sljit_sh, sljit_uh : signed and unsigned 16 bit half-word (short) type + sljit_si, sljit_ui : signed and unsigned 32 bit integer type + sljit_sw, sljit_uw : signed and unsigned machine word, enough to store a pointer + sljit_p : unsgined pointer value (usually the same as sljit_uw, but + some 64 bit ABIs may use 32 bit pointers) + sljit_s : single precision floating point value + sljit_d : double precision floating point value + + Macros for feature detection (boolean): + SLJIT_32BIT_ARCHITECTURE : 32 bit architecture + SLJIT_64BIT_ARCHITECTURE : 64 bit architecture + SLJIT_LITTLE_ENDIAN : little endian architecture + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + + Constants: + SLJIT_NUMBER_OF_REGISTERS : number of available registers + SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers + SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers + SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers + SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers + SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers + SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index + SLJIT_DOUBLE_SHIFT : the shift required to apply when accessing + a double precision floating point array by index + SLJIT_SINGLE_SHIFT : the shift required to apply when accessing + a single precision floating point array by index + SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET) + SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address + + Other macros: + SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT + SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) +*/ + +/*****************/ +/* Sanity check. */ +/*****************/ + +#if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + || (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ + || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + || (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + || (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \ + || (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + || (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)) +#error "An architecture must be selected" +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + + (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + + (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) \ + + (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + + (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + + (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + + (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + + (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + + (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) \ + + (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + + (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + + (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + + (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) \ + + (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) >= 2 +#error "Multiple architectures are selected" +#endif + +/********************************************************/ +/* Automatic CPU detection (requires compiler support). */ +/********************************************************/ + +#if (defined SLJIT_CONFIG_AUTO && SLJIT_CONFIG_AUTO) + +#ifndef _WIN32 + +#if defined(__i386__) || defined(__i386) +#define SLJIT_CONFIG_X86_32 1 +#elif defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif defined(__arm__) || defined(__ARM__) +#ifdef __thumb2__ +#define SLJIT_CONFIG_ARM_THUMB2 1 +#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) +#define SLJIT_CONFIG_ARM_V7 1 +#else +#define SLJIT_CONFIG_ARM_V5 1 +#endif +#elif defined (__aarch64__) +#define SLJIT_CONFIG_ARM_64 1 +#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__)) +#define SLJIT_CONFIG_PPC_64 1 +#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER) +#define SLJIT_CONFIG_PPC_32 1 +#elif defined(__mips__) && !defined(_LP64) +#define SLJIT_CONFIG_MIPS_32 1 +#elif defined(__mips64) +#define SLJIT_CONFIG_MIPS_64 1 +#elif defined(__sparc__) || defined(__sparc) +#define SLJIT_CONFIG_SPARC_32 1 +#elif defined(__tilegx__) +#define SLJIT_CONFIG_TILEGX 1 +#else +/* Unsupported architecture */ +#define SLJIT_CONFIG_UNSUPPORTED 1 +#endif + +#else /* !_WIN32 */ + +#if defined(_M_X64) || defined(__x86_64__) +#define SLJIT_CONFIG_X86_64 1 +#elif defined(_ARM_) +#define SLJIT_CONFIG_ARM_V5 1 +#else +#define SLJIT_CONFIG_X86_32 1 +#endif + +#endif /* !WIN32 */ +#endif /* SLJIT_CONFIG_AUTO */ + +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#undef SLJIT_EXECUTABLE_ALLOCATOR +#endif + +/******************************/ +/* CPU family type detection. */ +/******************************/ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +#define SLJIT_CONFIG_ARM_32 1 +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#define SLJIT_CONFIG_X86 1 +#elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +#define SLJIT_CONFIG_ARM 1 +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_CONFIG_PPC 1 +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +#define SLJIT_CONFIG_MIPS 1 +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) || (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64) +#define SLJIT_CONFIG_SPARC 1 +#endif + +/**********************************/ +/* External function definitions. */ +/**********************************/ + +#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) + +/* These libraries are needed for the macros below. */ +#include +#include + +#endif /* SLJIT_STD_MACROS_DEFINED */ + +/* General macros: + Note: SLJIT is designed to be independent from them as possible. + + In release mode (SLJIT_DEBUG is not defined) only the following + external functions are needed: +*/ + +#ifndef SLJIT_MALLOC +#define SLJIT_MALLOC(size, allocator_data) malloc(size) +#endif + +#ifndef SLJIT_FREE +#define SLJIT_FREE(ptr, allocator_data) free(ptr) +#endif + +#ifndef SLJIT_MEMMOVE +#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) +#endif + +#ifndef SLJIT_ZEROMEM +#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) +#endif + +/***************************/ +/* Compiler helper macros. */ +/***************************/ + +#if !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) + +#if defined(__GNUC__) && (__GNUC__ >= 3) +#define SLJIT_LIKELY(x) __builtin_expect((x), 1) +#define SLJIT_UNLIKELY(x) __builtin_expect((x), 0) +#else +#define SLJIT_LIKELY(x) (x) +#define SLJIT_UNLIKELY(x) (x) +#endif + +#endif /* !defined(SLJIT_LIKELY) && !defined(SLJIT_UNLIKELY) */ + +#ifndef SLJIT_INLINE +/* Inline functions. Some old compilers do not support them. */ +#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510 +#define SLJIT_INLINE +#else +#define SLJIT_INLINE __inline +#endif +#endif /* !SLJIT_INLINE */ + +#ifndef SLJIT_CONST +/* Const variables. */ +#define SLJIT_CONST const +#endif + +#ifndef SLJIT_UNUSED_ARG +/* Unused arguments. */ +#define SLJIT_UNUSED_ARG(arg) (void)arg +#endif + +/*********************************/ +/* Type of public API functions. */ +/*********************************/ + +#if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) +/* Static ABI functions. For all-in-one programs. */ + +#if defined(__GNUC__) +/* Disable unused warnings in gcc. */ +#define SLJIT_API_FUNC_ATTRIBUTE static __attribute__((unused)) +#else +#define SLJIT_API_FUNC_ATTRIBUTE static +#endif + +#else +#define SLJIT_API_FUNC_ATTRIBUTE +#endif /* (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC) */ + +/****************************/ +/* Instruction cache flush. */ +/****************************/ + +#ifndef SLJIT_CACHE_FLUSH + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +/* Not required to implement on archs with unified caches. */ +#define SLJIT_CACHE_FLUSH(from, to) + +#elif defined __APPLE__ + +/* Supported by all macs since Mac OS 10.5. + However, it does not work on non-jailbroken iOS devices, + although the compilation is successful. */ + +#define SLJIT_CACHE_FLUSH(from, to) \ + sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) + +#elif defined __ANDROID__ + +/* Android lacks __clear_cache; instead, cacheflush should be used. */ + +#define SLJIT_CACHE_FLUSH(from, to) \ + cacheflush((long)(from), (long)(to), 0) + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + ppc_cache_flush((from), (to)) + +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + +/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + sparc_cache_flush((from), (to)) + +#else + +/* Calls __ARM_NR_cacheflush on ARM-Linux. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + __clear_cache((char*)(from), (char*)(to)) + +#endif + +#endif /* !SLJIT_CACHE_FLUSH */ + +/******************************************************/ +/* Byte/half/int/word/single/double type definitions. */ +/******************************************************/ + +/* 8 bit byte type. */ +typedef unsigned char sljit_ub; +typedef signed char sljit_sb; + +/* 16 bit half-word type. */ +typedef unsigned short int sljit_uh; +typedef signed short int sljit_sh; + +/* 32 bit integer type. */ +typedef unsigned int sljit_ui; +typedef signed int sljit_si; + +/* Machine word type. Enough for storing a pointer. + 32 bit for 32 bit machines. + 64 bit for 64 bit machines. */ +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +/* Just to have something. */ +#define SLJIT_WORD_SHIFT 0 +typedef unsigned long int sljit_uw; +typedef long int sljit_sw; +#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) \ + && !(defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) \ + && !(defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) +#define SLJIT_32BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 2 +typedef unsigned int sljit_uw; +typedef int sljit_sw; +#else +#define SLJIT_64BIT_ARCHITECTURE 1 +#define SLJIT_WORD_SHIFT 3 +#ifdef _WIN32 +typedef unsigned __int64 sljit_uw; +typedef __int64 sljit_sw; +#else +typedef unsigned long int sljit_uw; +typedef long int sljit_sw; +#endif +#endif + +typedef sljit_uw sljit_p; + +/* Floating point types. */ +typedef float sljit_s; +typedef double sljit_d; + +/* Shift for pointer sized data. */ +#define SLJIT_POINTER_SHIFT SLJIT_WORD_SHIFT + +/* Shift for double precision sized data. */ +#define SLJIT_DOUBLE_SHIFT 3 +#define SLJIT_SINGLE_SHIFT 2 + +#ifndef SLJIT_W + +/* Defining long constants. */ +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define SLJIT_W(w) (w##ll) +#else +#define SLJIT_W(w) (w) +#endif + +#endif /* !SLJIT_W */ + +/*************************/ +/* Endianness detection. */ +/*************************/ + +#if !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) + +/* These macros are mostly useful for the applications. */ +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + +#ifdef __LITTLE_ENDIAN__ +#define SLJIT_LITTLE_ENDIAN 1 +#else +#define SLJIT_BIG_ENDIAN 1 +#endif + +#elif (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) \ + || (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + +#ifdef __MIPSEL__ +#define SLJIT_LITTLE_ENDIAN 1 +#else +#define SLJIT_BIG_ENDIAN 1 +#endif + +#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + +#define SLJIT_BIG_ENDIAN 1 + +#else +#define SLJIT_LITTLE_ENDIAN 1 +#endif + +#endif /* !defined(SLJIT_BIG_ENDIAN) && !defined(SLJIT_LITTLE_ENDIAN) */ + +/* Sanity check. */ +#if (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#if !(defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) && !(defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#error "Exactly one endianness must be selected" +#endif + +#ifndef SLJIT_UNALIGNED + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ + || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) \ + || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \ + || (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) \ + || (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \ + || (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_UNALIGNED 1 +#endif + +#endif /* !SLJIT_UNALIGNED */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +/* Auto detect SSE2 support using CPUID. + On 64 bit x86 cpus, sse2 must be present. */ +#define SLJIT_DETECT_SSE2 1 +#endif + +/*****************************************************************************************/ +/* Calling convention of functions generated by SLJIT or called from the generated code. */ +/*****************************************************************************************/ + +#ifndef SLJIT_CALL + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#if defined(__GNUC__) && !defined(__APPLE__) + +#define SLJIT_CALL __attribute__ ((fastcall)) +#define SLJIT_X86_32_FASTCALL 1 + +#elif defined(_MSC_VER) + +#define SLJIT_CALL __fastcall +#define SLJIT_X86_32_FASTCALL 1 + +#elif defined(__BORLANDC__) + +#define SLJIT_CALL __msfastcall +#define SLJIT_X86_32_FASTCALL 1 + +#else /* Unknown compiler. */ + +/* The cdecl attribute is the default. */ +#define SLJIT_CALL + +#endif + +#else /* Non x86-32 architectures. */ + +#define SLJIT_CALL + +#endif /* SLJIT_CONFIG_X86_32 */ + +#endif /* !SLJIT_CALL */ + +#ifndef SLJIT_INDIRECT_CALL +#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \ + || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX) +/* It seems certain ppc compilers use an indirect addressing for functions + which makes things complicated. */ +#define SLJIT_INDIRECT_CALL 1 +#endif +#endif /* SLJIT_INDIRECT_CALL */ + +/* The offset which needs to be substracted from the return address to +determine the next executed instruction after return. */ +#ifndef SLJIT_RETURN_ADDRESS_OFFSET +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define SLJIT_RETURN_ADDRESS_OFFSET 8 +#else +#define SLJIT_RETURN_ADDRESS_OFFSET 0 +#endif +#endif /* SLJIT_RETURN_ADDRESS_OFFSET */ + +/***************************************************/ +/* Functions of the built-in executable allocator. */ +/***************************************************/ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size) +#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr) +#endif + +/**********************************************/ +/* Registers and locals offset determination. */ +/**********************************************/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +#define SLJIT_NUMBER_OF_REGISTERS 10 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) +#define SLJIT_LOCALS_OFFSET_BASE ((2 + 4) * sizeof(sljit_sw)) +#else +/* Maximum 3 arguments are passed on the stack, +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 4) * sizeof(sljit_sw)) +#endif /* SLJIT_X86_32_FASTCALL */ + +#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#ifndef _WIN64 +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 +#define SLJIT_LOCALS_OFFSET_BASE (sizeof(sljit_sw)) +#else +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE ((4 + 2) * sizeof(sljit_sw)) +#endif /* _WIN64 */ + +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + +#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + +#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + +#define SLJIT_NUMBER_OF_REGISTERS 25 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10 +#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw)) + +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +#define SLJIT_NUMBER_OF_REGISTERS 22 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX) +#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw)) +#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +/* Add +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw)) +#else +#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw)) +#endif /* SLJIT_CONFIG_PPC_64 || _AIX */ + +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +#define SLJIT_NUMBER_OF_REGISTERS 17 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw)) +#else +#define SLJIT_LOCALS_OFFSET_BASE 0 +#endif + +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) + +#define SLJIT_NUMBER_OF_REGISTERS 18 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 14 +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +/* Add +1 for double alignment. */ +#define SLJIT_LOCALS_OFFSET_BASE ((23 + 1) * sizeof(sljit_sw)) +#endif + +#elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define SLJIT_NUMBER_OF_REGISTERS 0 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0 +#define SLJIT_LOCALS_OFFSET_BASE 0 + +#endif + +#define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE) + +#define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \ + (SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS) + +#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64) +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1 +#else +#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0 +#endif + +#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \ + (SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS) + +/*************************************/ +/* Debug and verbose related macros. */ +/*************************************/ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +#include +#endif + +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + +#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP) + +/* SLJIT_HALT_PROCESS must halt the process. */ +#ifndef SLJIT_HALT_PROCESS +#include + +#define SLJIT_HALT_PROCESS() \ + abort(); +#endif /* !SLJIT_HALT_PROCESS */ + +#include + +#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */ + +/* Feel free to redefine these two macros. */ +#ifndef SLJIT_ASSERT + +#define SLJIT_ASSERT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) { \ + printf("Assertion failed at " __FILE__ ":%d\n", __LINE__); \ + SLJIT_HALT_PROCESS(); \ + } \ + } while (0) + +#endif /* !SLJIT_ASSERT */ + +#ifndef SLJIT_ASSERT_STOP + +#define SLJIT_ASSERT_STOP() \ + do { \ + printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \ + SLJIT_HALT_PROCESS(); \ + } while (0) + +#endif /* !SLJIT_ASSERT_STOP */ + +#else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +/* Forcing empty, but valid statements. */ +#undef SLJIT_ASSERT +#undef SLJIT_ASSERT_STOP + +#define SLJIT_ASSERT(x) \ + do { } while (0) +#define SLJIT_ASSERT_STOP() \ + do { } while (0) + +#endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ + +#ifndef SLJIT_COMPILE_ASSERT + +/* Should be improved eventually. */ +#define SLJIT_COMPILE_ASSERT(x, description) \ + SLJIT_ASSERT(x) + +#endif /* !SLJIT_COMPILE_ASSERT */ + +#endif diff --git a/src/sljit/sljitExecAllocator.c b/src/sljit/sljitExecAllocator.c new file mode 100644 index 0000000..f24ed33 --- /dev/null +++ b/src/sljit/sljitExecAllocator.c @@ -0,0 +1,312 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + This file contains a simple executable memory allocator + + It is assumed, that executable code blocks are usually medium (or sometimes + large) memory blocks, and the allocator is not too frequently called (less + optimized than other allocators). Thus, using it as a generic allocator is + not suggested. + + How does it work: + Memory is allocated in continuous memory areas called chunks by alloc_chunk() + Chunk format: + [ block ][ block ] ... [ block ][ block terminator ] + + All blocks and the block terminator is started with block_header. The block + header contains the size of the previous and the next block. These sizes + can also contain special values. + Block size: + 0 - The block is a free_block, with a different size member. + 1 - The block is a block terminator. + n - The block is used at the moment, and the value contains its size. + Previous block size: + 0 - This is the first block of the memory chunk. + n - The size of the previous block. + + Using these size values we can go forward or backward on the block chain. + The unused blocks are stored in a chain list pointed by free_blocks. This + list is useful if we need to find a suitable memory area when the allocator + is called. + + When a block is freed, the new free block is connected to its adjacent free + blocks if possible. + + [ free block ][ used block ][ free block ] + and "used block" is freed, the three blocks are connected together: + [ one big free block ] +*/ + +/* --------------------------------------------------------------------- */ +/* System (OS) functions */ +/* --------------------------------------------------------------------- */ + +/* 64 KByte. */ +#define CHUNK_SIZE 0x10000 + +/* + alloc_chunk / free_chunk : + * allocate executable system memory chunks + * the size is always divisible by CHUNK_SIZE + allocator_grab_lock / allocator_release_lock : + * make the allocator thread safe + * can be empty if the OS (or the application) does not support threading + * only the allocator requires this lock, sljit is fully thread safe + as it only uses local variables +*/ + +#ifdef _WIN32 + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); +} + +static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) +{ + SLJIT_UNUSED_ARG(size); + VirtualFree(chunk, 0, MEM_RELEASE); +} + +#else + +static SLJIT_INLINE void* alloc_chunk(sljit_uw size) +{ + void* retval; + +#ifdef MAP_ANON + retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); +#else + if (dev_zero < 0) { + if (open_dev_zero()) + return NULL; + } + retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0); +#endif + + return (retval != MAP_FAILED) ? retval : NULL; +} + +static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) +{ + munmap(chunk, size); +} + +#endif + +/* --------------------------------------------------------------------- */ +/* Common functions */ +/* --------------------------------------------------------------------- */ + +#define CHUNK_MASK (~(CHUNK_SIZE - 1)) + +struct block_header { + sljit_uw size; + sljit_uw prev_size; +}; + +struct free_block { + struct block_header header; + struct free_block *next; + struct free_block *prev; + sljit_uw size; +}; + +#define AS_BLOCK_HEADER(base, offset) \ + ((struct block_header*)(((sljit_ub*)base) + offset)) +#define AS_FREE_BLOCK(base, offset) \ + ((struct free_block*)(((sljit_ub*)base) + offset)) +#define MEM_START(base) ((void*)(((sljit_ub*)base) + sizeof(struct block_header))) +#define ALIGN_SIZE(size) (((size) + sizeof(struct block_header) + 7) & ~7) + +static struct free_block* free_blocks; +static sljit_uw allocated_size; +static sljit_uw total_size; + +static SLJIT_INLINE void sljit_insert_free_block(struct free_block *free_block, sljit_uw size) +{ + free_block->header.size = 0; + free_block->size = size; + + free_block->next = free_blocks; + free_block->prev = 0; + if (free_blocks) + free_blocks->prev = free_block; + free_blocks = free_block; +} + +static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) +{ + if (free_block->next) + free_block->next->prev = free_block->prev; + + if (free_block->prev) + free_block->prev->next = free_block->next; + else { + SLJIT_ASSERT(free_blocks == free_block); + free_blocks = free_block->next; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) +{ + struct block_header *header; + struct block_header *next_header; + struct free_block *free_block; + sljit_uw chunk_size; + + allocator_grab_lock(); + if (size < sizeof(struct free_block)) + size = sizeof(struct free_block); + size = ALIGN_SIZE(size); + + free_block = free_blocks; + while (free_block) { + if (free_block->size >= size) { + chunk_size = free_block->size; + if (chunk_size > size + 64) { + /* We just cut a block from the end of the free block. */ + chunk_size -= size; + free_block->size = chunk_size; + header = AS_BLOCK_HEADER(free_block, chunk_size); + header->prev_size = chunk_size; + AS_BLOCK_HEADER(header, size)->prev_size = size; + } + else { + sljit_remove_free_block(free_block); + header = (struct block_header*)free_block; + size = chunk_size; + } + allocated_size += size; + header->size = size; + allocator_release_lock(); + return MEM_START(header); + } + free_block = free_block->next; + } + + chunk_size = (size + sizeof(struct block_header) + CHUNK_SIZE - 1) & CHUNK_MASK; + header = (struct block_header*)alloc_chunk(chunk_size); + if (!header) { + allocator_release_lock(); + return NULL; + } + + chunk_size -= sizeof(struct block_header); + total_size += chunk_size; + + header->prev_size = 0; + if (chunk_size > size + 64) { + /* Cut the allocated space into a free and a used block. */ + allocated_size += size; + header->size = size; + chunk_size -= size; + + free_block = AS_FREE_BLOCK(header, size); + free_block->header.prev_size = size; + sljit_insert_free_block(free_block, chunk_size); + next_header = AS_BLOCK_HEADER(free_block, chunk_size); + } + else { + /* All space belongs to this allocation. */ + allocated_size += chunk_size; + header->size = chunk_size; + next_header = AS_BLOCK_HEADER(header, chunk_size); + } + next_header->size = 1; + next_header->prev_size = chunk_size; + allocator_release_lock(); + return MEM_START(header); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) +{ + struct block_header *header; + struct free_block* free_block; + + allocator_grab_lock(); + header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header)); + allocated_size -= header->size; + + /* Connecting free blocks together if possible. */ + + /* If header->prev_size == 0, free_block will equal to header. + In this case, free_block->header.size will be > 0. */ + free_block = AS_FREE_BLOCK(header, -(sljit_sw)header->prev_size); + if (SLJIT_UNLIKELY(!free_block->header.size)) { + free_block->size += header->size; + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + else { + free_block = (struct free_block*)header; + sljit_insert_free_block(free_block, header->size); + } + + header = AS_BLOCK_HEADER(free_block, free_block->size); + if (SLJIT_UNLIKELY(!header->size)) { + free_block->size += ((struct free_block*)header)->size; + sljit_remove_free_block((struct free_block*)header); + header = AS_BLOCK_HEADER(free_block, free_block->size); + header->prev_size = free_block->size; + } + + /* The whole chunk is free. */ + if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + /* If this block is freed, we still have (allocated_size / 2) free space. */ + if (total_size - free_block->size > (allocated_size * 3 / 2)) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + sizeof(struct block_header)); + } + } + + allocator_release_lock(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) +{ + struct free_block* free_block; + struct free_block* next_free_block; + + allocator_grab_lock(); + + free_block = free_blocks; + while (free_block) { + next_free_block = free_block->next; + if (!free_block->header.prev_size && + AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) { + total_size -= free_block->size; + sljit_remove_free_block(free_block); + free_chunk(free_block, free_block->size + sizeof(struct block_header)); + } + free_block = next_free_block; + } + + SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + allocator_release_lock(); +} diff --git a/src/sljit/sljitLir.c b/src/sljit/sljitLir.c new file mode 100644 index 0000000..f0b3947 --- /dev/null +++ b/src/sljit/sljitLir.c @@ -0,0 +1,2017 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sljitLir.h" + +#define CHECK_ERROR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return compiler->error; \ + } while (0) + +#define CHECK_ERROR_PTR() \ + do { \ + if (SLJIT_UNLIKELY(compiler->error)) \ + return NULL; \ + } while (0) + +#define FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return compiler->error; \ + } while (0) + +#define PTR_FAIL_IF(expr) \ + do { \ + if (SLJIT_UNLIKELY(expr)) \ + return NULL; \ + } while (0) + +#define FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return SLJIT_ERR_ALLOC_FAILED; \ + } \ + } while (0) + +#define PTR_FAIL_IF_NULL(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#define PTR_FAIL_WITH_EXEC_IF(ptr) \ + do { \ + if (SLJIT_UNLIKELY(!(ptr))) { \ + compiler->error = SLJIT_ERR_EX_ALLOC_FAILED; \ + return NULL; \ + } \ + } while (0) + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#define GET_OPCODE(op) \ + ((op) & ~(SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) + +#define GET_FLAGS(op) \ + ((op) & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) + +#define GET_ALL_FLAGS(op) \ + ((op) & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) + +#define TYPE_CAST_NEEDED(op) \ + (((op) >= SLJIT_MOV_UB && (op) <= SLJIT_MOV_SH) || ((op) >= SLJIT_MOVU_UB && (op) <= SLJIT_MOVU_SH)) + +#define BUF_SIZE 4096 + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define ABUF_SIZE 2048 +#else +#define ABUF_SIZE 4096 +#endif + +/* Parameter parsing. */ +#define REG_MASK 0x3f +#define OFFS_REG(reg) (((reg) >> 8) & REG_MASK) +#define OFFS_REG_MASK (REG_MASK << 8) +#define TO_OFFS_REG(reg) ((reg) << 8) +/* When reg cannot be unused. */ +#define FAST_IS_REG(reg) ((reg) <= REG_MASK) +/* When reg can be unused. */ +#define SLOW_IS_REG(reg) ((reg) > 0 && (reg) <= REG_MASK) + +/* Jump flags. */ +#define JUMP_LABEL 0x1 +#define JUMP_ADDR 0x2 +/* SLJIT_REWRITABLE_JUMP is 0x1000. */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +# define PATCH_MB 0x4 +# define PATCH_MW 0x8 +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +# define PATCH_MD 0x10 +#endif +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# define IS_BL 0x4 +# define PATCH_B 0x8 +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +# define CPOOL_SIZE 512 +#endif + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +# define IS_COND 0x04 +# define IS_BL 0x08 + /* conditional + imm8 */ +# define PATCH_TYPE1 0x10 + /* conditional + imm20 */ +# define PATCH_TYPE2 0x20 + /* IT + imm24 */ +# define PATCH_TYPE3 0x30 + /* imm11 */ +# define PATCH_TYPE4 0x40 + /* imm24 */ +# define PATCH_TYPE5 0x50 + /* BL + imm24 */ +# define PATCH_BL 0x60 + /* 0xf00 cc code for branches */ +#endif + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +# define IS_COND 0x004 +# define IS_CBZ 0x008 +# define IS_BL 0x010 +# define PATCH_B 0x020 +# define PATCH_COND 0x040 +# define PATCH_ABS48 0x080 +# define PATCH_ABS64 0x100 +#endif + +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +# define IS_COND 0x004 +# define IS_CALL 0x008 +# define PATCH_B 0x010 +# define PATCH_ABS_B 0x020 +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +# define PATCH_ABS32 0x040 +# define PATCH_ABS48 0x080 +#endif +# define REMOVE_COND 0x100 +#endif + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +# define IS_MOVABLE 0x004 +# define IS_JAL 0x008 +# define IS_CALL 0x010 +# define IS_BIT26_COND 0x020 +# define IS_BIT16_COND 0x040 + +# define IS_COND (IS_BIT26_COND | IS_BIT16_COND) + +# define PATCH_B 0x080 +# define PATCH_J 0x100 + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +# define PATCH_ABS32 0x200 +# define PATCH_ABS48 0x400 +#endif + + /* instruction types */ +# define MOVABLE_INS 0 + /* 1 - 31 last destination register */ + /* no destination (i.e: store) */ +# define UNMOVABLE_INS 32 + /* FPU status register */ +# define FCSR_FCC 33 +#endif + +#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) +# define IS_JAL 0x04 +# define IS_COND 0x08 + +# define PATCH_B 0x10 +# define PATCH_J 0x20 +#endif + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +# define IS_MOVABLE 0x04 +# define IS_COND 0x08 +# define IS_CALL 0x10 + +# define PATCH_B 0x20 +# define PATCH_CALL 0x40 + + /* instruction types */ +# define MOVABLE_INS 0 + /* 1 - 31 last destination register */ + /* no destination (i.e: store) */ +# define UNMOVABLE_INS 32 + +# define DST_INS_MASK 0xff + + /* ICC_SET is the same as SET_FLAGS. */ +# define ICC_IS_SET (1 << 23) +# define FCC_IS_SET (1 << 24) +#endif + +/* Stack management. */ + +#define GET_SAVED_REGISTERS_SIZE(scratches, saveds, extra) \ + (((scratches < SLJIT_NUMBER_OF_SCRATCH_REGISTERS ? 0 : (scratches - SLJIT_NUMBER_OF_SCRATCH_REGISTERS)) + \ + (saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? saveds : SLJIT_NUMBER_OF_SAVED_REGISTERS) + \ + extra) * sizeof(sljit_sw)) + +#define ADJUST_LOCAL_OFFSET(p, i) \ + if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + (i) += SLJIT_LOCALS_OFFSET; + +#endif /* !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) */ + +/* Utils can still be used even if SLJIT_CONFIG_UNSUPPORTED is set. */ +#include "sljitUtils.c" + +#if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +#include "sljitExecAllocator.c" +#endif + +/* Argument checking features. */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + +/* Returns with error when an invalid argument is passed. */ + +#define CHECK_ARGUMENT(x) \ + do { \ + if (SLJIT_UNLIKELY(!(x))) \ + return 1; \ + } while (0) + +#define CHECK_RETURN_TYPE sljit_si +#define CHECK_RETURN_OK return 0 + +#define CHECK(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return SLJIT_ERR_BAD_ARGUMENT; \ + } \ + } while (0) + +#define CHECK_PTR(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + compiler->error = SLJIT_ERR_BAD_ARGUMENT; \ + return NULL; \ + } \ + } while (0) + +#define CHECK_REG_INDEX(x) \ + do { \ + if (SLJIT_UNLIKELY(x)) { \ + return -2; \ + } \ + } while (0) + +#elif (defined SLJIT_DEBUG && SLJIT_DEBUG) + +/* Assertion failure occures if an invalid argument is passed. */ +#undef SLJIT_ARGUMENT_CHECKS +#define SLJIT_ARGUMENT_CHECKS 1 + +#define CHECK_ARGUMENT(x) SLJIT_ASSERT(x) +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x + +#elif (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +/* Arguments are not checked. */ +#define CHECK_RETURN_TYPE void +#define CHECK_RETURN_OK return +#define CHECK(x) x +#define CHECK_PTR(x) x +#define CHECK_REG_INDEX(x) x + +#else + +/* Arguments are not checked. */ +#define CHECK(x) +#define CHECK_PTR(x) +#define CHECK_REG_INDEX(x) + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +/* --------------------------------------------------------------------- */ +/* Public functions */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#define SLJIT_NEEDS_COMPILER_INIT 1 +static sljit_si compiler_initialized = 0; +/* A thread safe initialization. */ +static void init_compiler(void); +#endif + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data) +{ + struct sljit_compiler *compiler = (struct sljit_compiler*)SLJIT_MALLOC(sizeof(struct sljit_compiler), allocator_data); + if (!compiler) + return NULL; + SLJIT_ZEROMEM(compiler, sizeof(struct sljit_compiler)); + + SLJIT_COMPILE_ASSERT( + sizeof(sljit_sb) == 1 && sizeof(sljit_ub) == 1 + && sizeof(sljit_sh) == 2 && sizeof(sljit_uh) == 2 + && sizeof(sljit_si) == 4 && sizeof(sljit_ui) == 4 + && (sizeof(sljit_p) == 4 || sizeof(sljit_p) == 8) + && sizeof(sljit_p) <= sizeof(sljit_sw) + && (sizeof(sljit_sw) == 4 || sizeof(sljit_sw) == 8) + && (sizeof(sljit_uw) == 4 || sizeof(sljit_uw) == 8), + invalid_integer_types); + SLJIT_COMPILE_ASSERT(SLJIT_INT_OP == SLJIT_SINGLE_OP, + int_op_and_single_op_must_be_the_same); + SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_SINGLE_OP, + rewritable_jump_and_single_op_must_not_be_the_same); + + /* Only the non-zero members must be set. */ + compiler->error = SLJIT_SUCCESS; + + compiler->allocator_data = allocator_data; + compiler->buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data); + compiler->abuf = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, allocator_data); + + if (!compiler->buf || !compiler->abuf) { + if (compiler->buf) + SLJIT_FREE(compiler->buf, allocator_data); + if (compiler->abuf) + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); + return NULL; + } + + compiler->buf->next = NULL; + compiler->buf->used_size = 0; + compiler->abuf->next = NULL; + compiler->abuf->used_size = 0; + + compiler->scratches = -1; + compiler->saveds = -1; + compiler->fscratches = -1; + compiler->fsaveds = -1; + compiler->local_size = -1; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->args = -1; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + compiler->cpool = (sljit_uw*)SLJIT_MALLOC(CPOOL_SIZE * sizeof(sljit_uw) + + CPOOL_SIZE * sizeof(sljit_ub), allocator_data); + if (!compiler->cpool) { + SLJIT_FREE(compiler->buf, allocator_data); + SLJIT_FREE(compiler->abuf, allocator_data); + SLJIT_FREE(compiler, allocator_data); + return NULL; + } + compiler->cpool_unique = (sljit_ub*)(compiler->cpool + CPOOL_SIZE); + compiler->cpool_diff = 0xffffffff; +#endif + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + compiler->delay_slot = UNMOVABLE_INS; +#endif + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + compiler->delay_slot = UNMOVABLE_INS; +#endif + +#if (defined SLJIT_NEEDS_COMPILER_INIT && SLJIT_NEEDS_COMPILER_INIT) + if (!compiler_initialized) { + init_compiler(); + compiler_initialized = 1; + } +#endif + + return compiler; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *curr; + void *allocator_data = compiler->allocator_data; + SLJIT_UNUSED_ARG(allocator_data); + + buf = compiler->buf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr, allocator_data); + } + + buf = compiler->abuf; + while (buf) { + curr = buf; + buf = buf->next; + SLJIT_FREE(curr, allocator_data); + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_FREE(compiler->cpool, allocator_data); +#endif + SLJIT_FREE(compiler, allocator_data); +} + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) +{ + /* Remove thumb mode flag. */ + SLJIT_FREE_EXEC((void*)((sljit_uw)code & ~0x1)); +} +#elif (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) +{ + /* Resolve indirection. */ + code = (void*)(*(sljit_uw*)code); + SLJIT_FREE_EXEC(code); +} +#else +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) +{ + SLJIT_FREE_EXEC(code); +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + if (SLJIT_LIKELY(!!jump) && SLJIT_LIKELY(!!label)) { + jump->flags &= ~JUMP_ADDR; + jump->flags |= JUMP_LABEL; + jump->u.label = label; + } +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + if (SLJIT_LIKELY(!!jump)) { + jump->flags &= ~JUMP_LABEL; + jump->flags |= JUMP_ADDR; + jump->u.target = target; + } +} + +/* --------------------------------------------------------------------- */ +/* Private functions */ +/* --------------------------------------------------------------------- */ + +static void* ensure_buf(struct sljit_compiler *compiler, sljit_uw size) +{ + sljit_ub *ret; + struct sljit_memory_fragment *new_frag; + + SLJIT_ASSERT(size <= 256); + if (compiler->buf->used_size + size <= (BUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { + ret = compiler->buf->memory + compiler->buf->used_size; + compiler->buf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, compiler->allocator_data); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->buf; + compiler->buf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +static void* ensure_abuf(struct sljit_compiler *compiler, sljit_uw size) +{ + sljit_ub *ret; + struct sljit_memory_fragment *new_frag; + + SLJIT_ASSERT(size <= 256); + if (compiler->abuf->used_size + size <= (ABUF_SIZE - (sljit_uw)SLJIT_OFFSETOF(struct sljit_memory_fragment, memory))) { + ret = compiler->abuf->memory + compiler->abuf->used_size; + compiler->abuf->used_size += size; + return ret; + } + new_frag = (struct sljit_memory_fragment*)SLJIT_MALLOC(ABUF_SIZE, compiler->allocator_data); + PTR_FAIL_IF_NULL(new_frag); + new_frag->next = compiler->abuf; + compiler->abuf = new_frag; + new_frag->used_size = size; + return new_frag->memory; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size) +{ + CHECK_ERROR_PTR(); + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + if (size <= 0 || size > 128) + return NULL; + size = (size + 7) & ~7; +#else + if (size <= 0 || size > 64) + return NULL; + size = (size + 3) & ~3; +#endif + return ensure_abuf(compiler, size); +} + +static SLJIT_INLINE void reverse_buf(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf = compiler->buf; + struct sljit_memory_fragment *prev = NULL; + struct sljit_memory_fragment *tmp; + + do { + tmp = buf->next; + buf->next = prev; + prev = buf; + buf = tmp; + } while (buf != NULL); + + compiler->buf = prev; +} + +static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(local_size); + + compiler->options = options; + compiler->scratches = scratches; + compiler->saveds = saveds; + compiler->fscratches = fscratches; + compiler->fsaveds = fsaveds; +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->logical_local_size = local_size; +#endif +} + +static SLJIT_INLINE void set_label(struct sljit_label *label, struct sljit_compiler *compiler) +{ + label->next = NULL; + label->size = compiler->size; + if (compiler->last_label) + compiler->last_label->next = label; + else + compiler->labels = label; + compiler->last_label = label; +} + +static SLJIT_INLINE void set_jump(struct sljit_jump *jump, struct sljit_compiler *compiler, sljit_si flags) +{ + jump->next = NULL; + jump->flags = flags; + if (compiler->last_jump) + compiler->last_jump->next = jump; + else + compiler->jumps = jump; + compiler->last_jump = jump; +} + +static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_compiler *compiler) +{ + const_->next = NULL; + const_->addr = compiler->size; + if (compiler->last_const) + compiler->last_const->next = const_; + else + compiler->consts = const_; + compiler->last_const = const_; +} + +#define ADDRESSING_DEPENDS_ON(exp, reg) \ + (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg)) + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) +#define FUNCTION_CHECK_OP() \ + CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ + switch (GET_OPCODE(op)) { \ + case SLJIT_NOT: \ + case SLJIT_CLZ: \ + case SLJIT_AND: \ + case SLJIT_OR: \ + case SLJIT_XOR: \ + case SLJIT_SHL: \ + case SLJIT_LSHR: \ + case SLJIT_ASHR: \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))); \ + break; \ + case SLJIT_NEG: \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ + break; \ + case SLJIT_MUL: \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ + break; \ + case SLJIT_ADD: \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \ + break; \ + case SLJIT_SUB: \ + break; \ + case SLJIT_ADDC: \ + case SLJIT_SUBC: \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))); \ + break; \ + case SLJIT_BREAKPOINT: \ + case SLJIT_NOP: \ + case SLJIT_LUMUL: \ + case SLJIT_LSMUL: \ + case SLJIT_MOV: \ + case SLJIT_MOV_UI: \ + case SLJIT_MOV_P: \ + case SLJIT_MOVU: \ + case SLJIT_MOVU_UI: \ + case SLJIT_MOVU_P: \ + /* Nothing allowed */ \ + CHECK_ARGUMENT(!(op & (SLJIT_INT_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + break; \ + default: \ + /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + break; \ + } + +#define FUNCTION_CHECK_FOP() \ + CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ + switch (GET_OPCODE(op)) { \ + case SLJIT_DCMP: \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ + break; \ + default: \ + /* Only SLJIT_INT_OP or SLJIT_SINGLE_OP is allowed. */ \ + CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ + break; \ + } + +#define FUNCTION_CHECK_IS_REG(r) \ + (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \ + ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) + +#define FUNCTION_CHECK_IS_REG_OR_UNUSED(r) \ + ((r) == SLJIT_UNUSED || \ + ((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \ + ((r) > (SLJIT_S0 - compiler->saveds) && (r) <= SLJIT_S0)) + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define CHECK_NOT_VIRTUAL_REGISTER(p) \ + CHECK_ARGUMENT((p) < SLJIT_R3 || (p) > SLJIT_R6); +#else +#define CHECK_NOT_VIRTUAL_REGISTER(p) +#endif + +#define FUNCTION_CHECK_SRC(p, i) \ + CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \ + if (FUNCTION_CHECK_IS_REG(p)) \ + CHECK_ARGUMENT((i) == 0); \ + else if ((p) == SLJIT_IMM) \ + ; \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \ + else { \ + CHECK_ARGUMENT((p) & SLJIT_MEM); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \ + CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ + CHECK_ARGUMENT(!((i) & ~0x3)); \ + } \ + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ + } + +#define FUNCTION_CHECK_DST(p, i) \ + CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \ + if (FUNCTION_CHECK_IS_REG_OR_UNUSED(p)) \ + CHECK_ARGUMENT((i) == 0); \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \ + else { \ + CHECK_ARGUMENT((p) & SLJIT_MEM); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \ + CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ + CHECK_ARGUMENT(!((i) & ~0x3)); \ + } \ + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ + } + +#define FUNCTION_FCHECK(p, i) \ + CHECK_ARGUMENT(compiler->fscratches != -1 && compiler->fsaveds != -1); \ + if (((p) >= SLJIT_FR0 && (p) < (SLJIT_FR0 + compiler->fscratches)) || \ + ((p) > (SLJIT_FS0 - compiler->fsaveds) && (p) <= SLJIT_FS0)) \ + CHECK_ARGUMENT(i == 0); \ + else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ + CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \ + else { \ + CHECK_ARGUMENT((p) & SLJIT_MEM); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG_OR_UNUSED((p) & REG_MASK)); \ + CHECK_NOT_VIRTUAL_REGISTER((p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + CHECK_ARGUMENT(((p) & REG_MASK) != SLJIT_UNUSED); \ + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(OFFS_REG(p))); \ + CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ + CHECK_ARGUMENT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \ + } \ + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ + } + +#define FUNCTION_CHECK_OP1() \ + if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \ + CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \ + CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \ + if ((src & SLJIT_MEM) && (src & REG_MASK)) \ + CHECK_ARGUMENT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \ + } + +#endif /* SLJIT_ARGUMENT_CHECKS */ + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + compiler->verbose = verbose; +} + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#ifdef _WIN64 +# define SLJIT_PRINT_D "I64" +#else +# define SLJIT_PRINT_D "l" +#endif +#else +# define SLJIT_PRINT_D "" +#endif + +#define sljit_verbose_reg(compiler, r) \ + do { \ + if ((r) < (SLJIT_R0 + compiler->scratches)) \ + fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \ + else \ + fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \ + } while (0) + +#define sljit_verbose_param(compiler, p, i) \ + if ((p) & SLJIT_IMM) \ + fprintf(compiler->verbose, "#%" SLJIT_PRINT_D "d", (i)); \ + else if ((p) & SLJIT_MEM) { \ + if ((p) & REG_MASK) { \ + fputc('[', compiler->verbose); \ + sljit_verbose_reg(compiler, (p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + fprintf(compiler->verbose, " + "); \ + sljit_verbose_reg(compiler, OFFS_REG(p)); \ + if (i) \ + fprintf(compiler->verbose, " * %d", 1 << (i)); \ + } \ + else if (i) \ + fprintf(compiler->verbose, " + %" SLJIT_PRINT_D "d", (i)); \ + fputc(']', compiler->verbose); \ + } \ + else \ + fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ + } else if (p) \ + sljit_verbose_reg(compiler, p); \ + else \ + fprintf(compiler->verbose, "unused"); + +#define sljit_verbose_fparam(compiler, p, i) \ + if ((p) & SLJIT_MEM) { \ + if ((p) & REG_MASK) { \ + fputc('[', compiler->verbose); \ + sljit_verbose_reg(compiler, (p) & REG_MASK); \ + if ((p) & OFFS_REG_MASK) { \ + fprintf(compiler->verbose, " + "); \ + sljit_verbose_reg(compiler, OFFS_REG(p)); \ + if (i) \ + fprintf(compiler->verbose, "%d", 1 << (i)); \ + } \ + else if (i) \ + fprintf(compiler->verbose, "%" SLJIT_PRINT_D "d", (i)); \ + fputc(']', compiler->verbose); \ + } \ + else \ + fprintf(compiler->verbose, "[#%" SLJIT_PRINT_D "d]", (i)); \ + } \ + else { \ + if ((p) < (SLJIT_FR0 + compiler->fscratches)) \ + fprintf(compiler->verbose, "fr%d", (p) - SLJIT_FR0); \ + else \ + fprintf(compiler->verbose, "fs%d", SLJIT_NUMBER_OF_FLOAT_REGISTERS - (p)); \ + } + +static SLJIT_CONST char* op0_names[] = { + (char*)"breakpoint", (char*)"nop", + (char*)"lumul", (char*)"lsmul", (char*)"ludiv", (char*)"lsdiv", +}; + +static SLJIT_CONST char* op1_names[] = { + (char*)"mov", (char*)"mov_ub", (char*)"mov_sb", (char*)"mov_uh", + (char*)"mov_sh", (char*)"mov_ui", (char*)"mov_si", (char*)"mov_p", + (char*)"movu", (char*)"movu_ub", (char*)"movu_sb", (char*)"movu_uh", + (char*)"movu_sh", (char*)"movu_ui", (char*)"movu_si", (char*)"movu_p", + (char*)"not", (char*)"neg", (char*)"clz", +}; + +static SLJIT_CONST char* op2_names[] = { + (char*)"add", (char*)"addc", (char*)"sub", (char*)"subc", + (char*)"mul", (char*)"and", (char*)"or", (char*)"xor", + (char*)"shl", (char*)"lshr", (char*)"ashr", +}; + +static SLJIT_CONST char* fop1_names[] = { + (char*)"mov", (char*)"conv", (char*)"conv", (char*)"conv", + (char*)"conv", (char*)"conv", (char*)"cmp", (char*)"neg", + (char*)"abs", +}; + +static SLJIT_CONST char* fop2_names[] = { + (char*)"add", (char*)"sub", (char*)"mul", (char*)"div" +}; + +#define JUMP_PREFIX(type) \ + ((type & 0xff) <= SLJIT_MUL_NOT_OVERFLOW ? ((type & SLJIT_INT_OP) ? "i_" : "") \ + : ((type & 0xff) <= SLJIT_D_ORDERED ? ((type & SLJIT_SINGLE_OP) ? "s_" : "d_") : "")) + +static char* jump_names[] = { + (char*)"equal", (char*)"not_equal", + (char*)"less", (char*)"greater_equal", + (char*)"greater", (char*)"less_equal", + (char*)"sig_less", (char*)"sig_greater_equal", + (char*)"sig_greater", (char*)"sig_less_equal", + (char*)"overflow", (char*)"not_overflow", + (char*)"mul_overflow", (char*)"mul_not_overflow", + (char*)"equal", (char*)"not_equal", + (char*)"less", (char*)"greater_equal", + (char*)"greater", (char*)"less_equal", + (char*)"unordered", (char*)"ordered", + (char*)"jump", (char*)"fast_call", + (char*)"call0", (char*)"call1", (char*)"call2", (char*)"call3" +}; + +#endif /* SLJIT_VERBOSE */ + +/* --------------------------------------------------------------------- */ +/* Arch dependent */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_generate_code(struct sljit_compiler *compiler) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + struct sljit_jump *jump; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->size > 0); + jump = compiler->jumps; + while (jump) { + /* All jumps have target. */ + CHECK_ARGUMENT(jump->flags & (JUMP_LABEL | JUMP_ADDR)); + jump = jump->next; + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT)); + CHECK_ARGUMENT(args >= 0 && args <= 3); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(args <= saveds); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " enter options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + args, scratches, saveds, fscratches, fsaveds, local_size); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT)); + CHECK_ARGUMENT(args >= 0 && args <= 3); + CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS); + CHECK_ARGUMENT(args <= saveds); + CHECK_ARGUMENT(fscratches >= 0 && fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); + CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " set_context options:none args:%d scratches:%d saveds:%d fscratches:%d fsaveds:%d local_size:%d\n", + args, scratches, saveds, fscratches, fsaveds, local_size); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(compiler->scratches >= 0); + if (op != SLJIT_UNUSED) { + CHECK_ARGUMENT(op >= SLJIT_MOV && op <= SLJIT_MOV_P); + FUNCTION_CHECK_SRC(src, srcw); + } + else + CHECK_ARGUMENT(src == 0 && srcw == 0); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (op == SLJIT_UNUSED) + fprintf(compiler->verbose, " return\n"); + else { + fprintf(compiler->verbose, " return.%s ", op1_names[op - SLJIT_OP1_BASE]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fast_enter "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fast_return "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LSMUL) + || ((op & ~SLJIT_INT_OP) >= SLJIT_LUDIV && (op & ~SLJIT_INT_OP) <= SLJIT_LSDIV)); + CHECK_ARGUMENT(op < SLJIT_LUMUL || compiler->scratches >= 2); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op0_names[GET_OPCODE(op) - SLJIT_OP0_BASE]); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ); + FUNCTION_CHECK_OP(); + FUNCTION_CHECK_SRC(src, srcw); + FUNCTION_CHECK_DST(dst, dstw); + FUNCTION_CHECK_OP1(); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], + !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", + !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR); + FUNCTION_CHECK_OP(); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], + !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", + !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_si reg) +{ + SLJIT_UNUSED_ARG(reg); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_float_register_index(sljit_si reg) +{ + SLJIT_UNUSED_ARG(reg); +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + int i; +#endif + + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(instruction); +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + CHECK_ARGUMENT(size > 0 && size < 16); +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + CHECK_ARGUMENT((size == 2 && (((sljit_sw)instruction) & 0x1) == 0) + || (size == 4 && (((sljit_sw)instruction) & 0x3) == 0)); +#else + CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0); +#endif + +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " op_custom"); + for (i = 0; i < size; i++) + fprintf(compiler->verbose, " 0x%x", ((sljit_ub*)instruction)[i]); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_DMOV && GET_OPCODE(op) <= SLJIT_DABS); + FUNCTION_CHECK_FOP(); + FUNCTION_FCHECK(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + fprintf(compiler->verbose, " %s%s ", fop1_names[SLJIT_CONVD_FROMS - SLJIT_FOP1_BASE], + (op & SLJIT_SINGLE_OP) ? "s.fromd" : "d.froms"); + else + fprintf(compiler->verbose, " %s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", + fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE]); + + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_DCMP); + FUNCTION_CHECK_FOP(); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s%s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", fop1_names[SLJIT_DCMP - SLJIT_FOP1_BASE], + (op & SLJIT_SET_E) ? ".e" : "", (op & SLJIT_SET_S) ? ".s" : ""); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_CONVI_FROMD); + FUNCTION_CHECK_FOP(); + FUNCTION_FCHECK(src, srcw); + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? "i" : "w", + (op & SLJIT_SINGLE_OP) ? "s" : "d"); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONVD_FROMW && GET_OPCODE(op) <= SLJIT_CONVD_FROMI); + FUNCTION_CHECK_FOP(); + FUNCTION_CHECK_SRC(src, srcw); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s.from%s ", fop1_names[GET_OPCODE(op) - SLJIT_FOP1_BASE], + (op & SLJIT_SINGLE_OP) ? "s" : "d", + (GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? "i" : "w"); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_DADD && GET_OPCODE(op) <= SLJIT_DDIV); + FUNCTION_CHECK_FOP(); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); + FUNCTION_FCHECK(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " %s%s ", (op & SLJIT_SINGLE_OP) ? "s" : "d", fop2_names[GET_OPCODE(op) - SLJIT_FOP2_BASE]); + sljit_verbose_fparam(compiler, dst, dstw); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, "label:\n"); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3); + CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_INT_OP)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) + fprintf(compiler->verbose, " jump%s.%s%s\n", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + JUMP_PREFIX(type), jump_names[type & 0xff]); +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_INT_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); + FUNCTION_CHECK_SRC(src1, src1w); + FUNCTION_CHECK_SRC(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " cmp%s.%s%s ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + (type & SLJIT_INT_OP) ? "i_" : "", jump_names[type & 0xff]); + sljit_verbose_param(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_SINGLE_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_D_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fcmp%s.%s%s ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : ".r", + (type & SLJIT_SINGLE_OP) ? "s_" : "d_", jump_names[type & 0xff]); + sljit_verbose_fparam(compiler, src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(compiler, src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); + FUNCTION_CHECK_SRC(src, srcw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " ijump.%s ", jump_names[type]); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED); + CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_UI || GET_OPCODE(op) == SLJIT_MOV_SI + || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); + CHECK_ARGUMENT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0); + CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS)); + if (GET_OPCODE(op) < SLJIT_ADD) { + CHECK_ARGUMENT(src == SLJIT_UNUSED && srcw == 0); + } else { + CHECK_ARGUMENT(src == dst && srcw == dstw); + } + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " flags.%s%s%s%s ", !(op & SLJIT_INT_OP) ? "" : "i", + GET_OPCODE(op) >= SLJIT_OP2_BASE ? op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE] : op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], + !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + sljit_verbose_param(compiler, dst, dstw); + if (src != SLJIT_UNUSED) { + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + } + fprintf(compiler->verbose, ", %s%s\n", JUMP_PREFIX(type), jump_names[type & 0xff]); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " local_base "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", offset); + } +#endif + CHECK_RETURN_OK; +} + +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + FUNCTION_CHECK_DST(dst, dstw); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " const "); + sljit_verbose_param(compiler, dst, dstw); + fprintf(compiler->verbose, ", #%" SLJIT_PRINT_D "d\n", init_value); + } +#endif + CHECK_RETURN_OK; +} + +#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */ + +#define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \ + SLJIT_COMPILE_ASSERT(!(SLJIT_CONVW_FROMD & 0x1) && !(SLJIT_CONVD_FROMW & 0x1), \ + invalid_float_opcodes); \ + if (GET_OPCODE(op) >= SLJIT_CONVW_FROMD && GET_OPCODE(op) <= SLJIT_DCMP) { \ + if (GET_OPCODE(op) == SLJIT_DCMP) { \ + CHECK(check_sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_cmp(compiler, op, dst, dstw, src, srcw); \ + } \ + if ((GET_OPCODE(op) | 0x1) == SLJIT_CONVI_FROMD) { \ + CHECK(check_sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_convw_fromd(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); \ + return sljit_emit_fop1_convd_fromw(compiler, op, dst, dstw, src, srcw); \ + } \ + CHECK(check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw)); \ + ADJUST_LOCAL_OFFSET(dst, dstw); \ + ADJUST_LOCAL_OFFSET(src, srcw); + +static SLJIT_INLINE sljit_si emit_mov_before_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + /* Return if don't need to do anything. */ + if (op == SLJIT_UNUSED) + return SLJIT_SUCCESS; + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + /* At the moment the pointer size is always equal to sljit_sw. May be changed in the future. */ + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_P)) + return SLJIT_SUCCESS; +#else + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P)) + return SLJIT_SUCCESS; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + compiler->skip_checks = 1; +#endif + return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); +} + +/* CPU description section */ + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 32bit (" +#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 64bit (" +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_CPUINFO_PART2 "little endian + " +#elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) +#define SLJIT_CPUINFO_PART2 "big endian + " +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) +#define SLJIT_CPUINFO_PART3 "unaligned)" +#else +#define SLJIT_CPUINFO_PART3 "aligned)" +#endif + +#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +# include "sljitNativeX86_common.c" +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +# include "sljitNativeARM_32.c" +#elif (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +# include "sljitNativeARM_32.c" +#elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) +# include "sljitNativeARM_T2_32.c" +#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) +# include "sljitNativeARM_64.c" +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) +# include "sljitNativePPC_common.c" +#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) +# include "sljitNativeMIPS_common.c" +#elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) +# include "sljitNativeSPARC_common.c" +#elif (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) +# include "sljitNativeTILEGX_64.c" +#endif + +#if !(defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + /* Default compare for most architectures. */ + sljit_si flags, tmp_src, condition; + sljit_sw tmp_srcw; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + + condition = type & 0xff; +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + if ((condition == SLJIT_EQUAL || condition == SLJIT_NOT_EQUAL)) { + if ((src1 & SLJIT_IMM) && !src1w) { + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + src2w = 0; + } + if ((src2 & SLJIT_IMM) && !src2w) + return emit_cmp_to0(compiler, type, src1, src1w); + } +#endif + + if (SLJIT_UNLIKELY((src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM))) { + /* Immediate is prefered as second argument by most architectures. */ + switch (condition) { + case SLJIT_LESS: + condition = SLJIT_GREATER; + break; + case SLJIT_GREATER_EQUAL: + condition = SLJIT_LESS_EQUAL; + break; + case SLJIT_GREATER: + condition = SLJIT_LESS; + break; + case SLJIT_LESS_EQUAL: + condition = SLJIT_GREATER_EQUAL; + break; + case SLJIT_SIG_LESS: + condition = SLJIT_SIG_GREATER; + break; + case SLJIT_SIG_GREATER_EQUAL: + condition = SLJIT_SIG_LESS_EQUAL; + break; + case SLJIT_SIG_GREATER: + condition = SLJIT_SIG_LESS; + break; + case SLJIT_SIG_LESS_EQUAL: + condition = SLJIT_SIG_GREATER_EQUAL; + break; + } + type = condition | (type & (SLJIT_INT_OP | SLJIT_REWRITABLE_JUMP)); + tmp_src = src1; + src1 = src2; + src2 = tmp_src; + tmp_srcw = src1w; + src1w = src2w; + src2w = tmp_srcw; + } + + if (condition <= SLJIT_NOT_ZERO) + flags = SLJIT_SET_E; + else if (condition <= SLJIT_LESS_EQUAL) + flags = SLJIT_SET_U; + else + flags = SLJIT_SET_S; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + PTR_FAIL_IF(sljit_emit_op2(compiler, SLJIT_SUB | flags | (type & SLJIT_INT_OP), + SLJIT_UNUSED, 0, src1, src1w, src2, src2w)); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si flags, condition; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); + + condition = type & 0xff; + flags = (condition <= SLJIT_D_NOT_EQUAL) ? SLJIT_SET_E : SLJIT_SET_S; + if (type & SLJIT_SINGLE_OP) + flags |= SLJIT_SINGLE_OP; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + sljit_emit_fop1(compiler, SLJIT_DCMP | flags, src1, src1w, src2, src2w); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); +} + +#endif + +#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +{ + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + if (offset != 0) + return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); +} + +#endif + +#else /* SLJIT_CONFIG_UNSUPPORTED */ + +/* Empty function bodies for those machines, which are not (yet) supported. */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ + return "unsupported"; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) +{ + SLJIT_ASSERT_STOP(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_ASSERT_STOP(); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(size); + SLJIT_ASSERT_STOP(); + return NULL; +} + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(verbose); + SLJIT_ASSERT_STOP(); +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_ASSERT_STOP(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) +{ + SLJIT_UNUSED_ARG(code); + SLJIT_ASSERT_STOP(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(scratches); + SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); + SLJIT_UNUSED_ARG(local_size); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(options); + SLJIT_UNUSED_ARG(args); + SLJIT_UNUSED_ARG(scratches); + SLJIT_UNUSED_ARG(saveds); + SLJIT_UNUSED_ARG(fscratches); + SLJIT_UNUSED_ARG(fsaveds); + SLJIT_UNUSED_ARG(local_size); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + SLJIT_ASSERT_STOP(); + return reg; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(instruction); + SLJIT_UNUSED_ARG(size); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ + SLJIT_ASSERT_STOP(); + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_ASSERT_STOP(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_ASSERT_STOP(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_ASSERT_STOP(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_ASSERT_STOP(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) +{ + SLJIT_UNUSED_ARG(jump); + SLJIT_UNUSED_ARG(label); + SLJIT_ASSERT_STOP(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) +{ + SLJIT_UNUSED_ARG(jump); + SLJIT_UNUSED_ARG(target); + SLJIT_ASSERT_STOP(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(src); + SLJIT_UNUSED_ARG(srcw); + SLJIT_UNUSED_ARG(type); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(offset); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw initval) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(dst); + SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(initval); + SLJIT_ASSERT_STOP(); + return NULL; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + SLJIT_UNUSED_ARG(addr); + SLJIT_UNUSED_ARG(new_addr); + SLJIT_ASSERT_STOP(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + SLJIT_UNUSED_ARG(addr); + SLJIT_UNUSED_ARG(new_constant); + SLJIT_ASSERT_STOP(); +} + +#endif diff --git a/src/sljit/sljitLir.h b/src/sljit/sljitLir.h new file mode 100644 index 0000000..229e04f --- /dev/null +++ b/src/sljit/sljitLir.h @@ -0,0 +1,1191 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SLJIT_LIR_H_ +#define _SLJIT_LIR_H_ + +/* + ------------------------------------------------------------------------ + Stack-Less JIT compiler for multiple architectures (x86, ARM, PowerPC) + ------------------------------------------------------------------------ + + Short description + Advantages: + - The execution can be continued from any LIR instruction. In other + words, it is possible to jump to any label from anywhere, even from + a code fragment, which is compiled later, if both compiled code + shares the same context. See sljit_emit_enter for more details + - Supports self modifying code: target of (conditional) jump and call + instructions and some constant values can be dynamically modified + during runtime + - although it is not suggested to do it frequently + - can be used for inline caching: save an important value once + in the instruction stream + - since this feature limits the optimization possibilities, a + special flag must be passed at compile time when these + instructions are emitted + - A fixed stack space can be allocated for local variables + - The compiler is thread-safe + - The compiler is highly configurable through preprocessor macros. + You can disable unneeded features (multithreading in single + threaded applications), and you can use your own system functions + (including memory allocators). See sljitConfig.h + Disadvantages: + - No automatic register allocation, and temporary results are + not stored on the stack. (hence the name comes) + In practice: + - This approach is very effective for interpreters + - One of the saved registers typically points to a stack interface + - It can jump to any exception handler anytime (even if it belongs + to another function) + - Hot paths can be modified during runtime reflecting the changes + of the fastest execution path of the dynamic language + - SLJIT supports complex memory addressing modes + - mainly position and context independent code (except some cases) + + For valgrind users: + - pass --smc-check=all argument to valgrind, since JIT is a "self-modifying code" +*/ + +#if !(defined SLJIT_NO_DEFAULT_CONFIG && SLJIT_NO_DEFAULT_CONFIG) +#include "sljitConfig.h" +#endif + +/* The following header file defines useful macros for fine tuning +sljit based code generators. They are listed in the beginning +of sljitConfigInternal.h */ + +#include "sljitConfigInternal.h" + +/* --------------------------------------------------------------------- */ +/* Error codes */ +/* --------------------------------------------------------------------- */ + +/* Indicates no error. */ +#define SLJIT_SUCCESS 0 +/* After the call of sljit_generate_code(), the error code of the compiler + is set to this value to avoid future sljit calls (in debug mode at least). + The complier should be freed after sljit_generate_code(). */ +#define SLJIT_ERR_COMPILED 1 +/* Cannot allocate non executable memory. */ +#define SLJIT_ERR_ALLOC_FAILED 2 +/* Cannot allocate executable memory. + Only for sljit_generate_code() */ +#define SLJIT_ERR_EX_ALLOC_FAILED 3 +/* Return value for SLJIT_CONFIG_UNSUPPORTED placeholder architecture. */ +#define SLJIT_ERR_UNSUPPORTED 4 +/* An ivalid argument is passed to any SLJIT function. */ +#define SLJIT_ERR_BAD_ARGUMENT 5 + +/* --------------------------------------------------------------------- */ +/* Registers */ +/* --------------------------------------------------------------------- */ + +/* + Scratch (R) registers: registers whose may not preserve their values + across function calls. + + Saved (S) registers: registers whose preserve their values across + function calls. + + The scratch and saved register sets are overlap. The last scratch register + is the first saved register, the one before the last is the second saved + register, and so on. + + If an architecture provides two scratch and three saved registers, + its scratch and saved register sets are the following: + + R0 | [S4] | R0 and S4 represent the same physical register + R1 | [S3] | R1 and S3 represent the same physical register + [R2] | S2 | R2 and S2 represent the same physical register + [R3] | S1 | R3 and S1 represent the same physical register + [R4] | S0 | R4 and S0 represent the same physical register + + Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and + SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. + + Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10 + and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers + are virtual on x86-32. See below. + + The purpose of this definition is convenience. Although a register + is either scratch register or saved register, SLJIT allows accessing + them from the other set. For example, four registers can be used as + scratch registers and the fifth one as saved register on the architecture + above. Of course the last two scratch registers (R2 and R3) from this + four will be saved on the stack, because they are defined as saved + registers in the application binary interface. Still R2 and R3 can be + used for referencing to these registers instead of S2 and S1, which + makes easier to write platform independent code. Scratch registers + can be saved registers in a similar way, but these extra saved + registers will not be preserved across function calls! Hence the + application must save them on those platforms, where the number of + saved registers is too low. This can be done by copy them onto + the stack and restore them after a function call. + + Note: To emphasize that registers assigned to R2-R4 are saved + registers, they are enclosed by square brackets. S3-S4 + are marked in a similar way. + + Note: sljit_emit_enter and sljit_set_context defines whether a register + is S or R register. E.g: when 3 scratches and 1 saved is mapped + by sljit_emit_enter, the allowed register set will be: R0-R2 and + S0. Although S2 is mapped to the same position as R2, it does not + available in the current configuration. Furthermore the R3 (S1) + register does not available as well. +*/ + +/* When SLJIT_UNUSED is specified as destination, the result is discarded. */ +#define SLJIT_UNUSED 0 + +/* Scratch registers. */ +#define SLJIT_R0 1 +#define SLJIT_R1 2 +#define SLJIT_R2 3 +/* Note: on x86-32, R3 - R6 (same as S3 - S6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_R3 4 +#define SLJIT_R4 5 +#define SLJIT_R5 6 +#define SLJIT_R6 7 +#define SLJIT_R7 8 +#define SLJIT_R8 9 +#define SLJIT_R9 10 +/* All R registers provided by the architecture can be accessed by SLJIT_R(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_REGISTERS. */ +#define SLJIT_R(i) (1 + (i)) + +/* Saved registers. */ +#define SLJIT_S0 (SLJIT_NUMBER_OF_REGISTERS) +#define SLJIT_S1 (SLJIT_NUMBER_OF_REGISTERS - 1) +#define SLJIT_S2 (SLJIT_NUMBER_OF_REGISTERS - 2) +/* Note: on x86-32, S3 - S6 (same as R3 - R6) are emulated (they + are allocated on the stack). These registers are called virtual + and cannot be used for memory addressing (cannot be part of + any SLJIT_MEM1, SLJIT_MEM2 construct). There is no such + limitation on other CPUs. See sljit_get_register_index(). */ +#define SLJIT_S3 (SLJIT_NUMBER_OF_REGISTERS - 3) +#define SLJIT_S4 (SLJIT_NUMBER_OF_REGISTERS - 4) +#define SLJIT_S5 (SLJIT_NUMBER_OF_REGISTERS - 5) +#define SLJIT_S6 (SLJIT_NUMBER_OF_REGISTERS - 6) +#define SLJIT_S7 (SLJIT_NUMBER_OF_REGISTERS - 7) +#define SLJIT_S8 (SLJIT_NUMBER_OF_REGISTERS - 8) +#define SLJIT_S9 (SLJIT_NUMBER_OF_REGISTERS - 9) +/* All S registers provided by the architecture can be accessed by SLJIT_S(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_REGISTERS. */ +#define SLJIT_S(i) (SLJIT_NUMBER_OF_REGISTERS - (i)) + +/* Registers >= SLJIT_FIRST_SAVED_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_REG (SLJIT_S0 - SLJIT_NUMBER_OF_SAVED_REGISTERS + 1) + +/* The SLJIT_SP provides direct access to the linear stack space allocated by + sljit_emit_enter. It can only be used in the following form: SLJIT_MEM1(SLJIT_SP). + The immediate offset is extended by the relative stack offset automatically. + The sljit_get_local_base can be used to obtain the absolute offset. */ +#define SLJIT_SP (SLJIT_NUMBER_OF_REGISTERS + 1) + +/* Return with machine word. */ + +#define SLJIT_RETURN_REG SLJIT_R0 + +/* x86 prefers specific registers for special purposes. In case of shift + by register it supports only SLJIT_R2 for shift argument + (which is the src2 argument of sljit_emit_op2). If another register is + used, sljit must exchange data between registers which cause a minor + slowdown. Other architectures has no such limitation. */ + +#define SLJIT_PREF_SHIFT_REG SLJIT_R2 + +/* --------------------------------------------------------------------- */ +/* Floating point registers */ +/* --------------------------------------------------------------------- */ + +/* Each floating point register can store a double or single precision + value. The FR and FS register sets are overlap in the same way as R + and S register sets. See above. */ + +/* Note: SLJIT_UNUSED as destination is not valid for floating point + operations, since they cannot be used for setting flags. */ + +/* Floating point scratch registers. */ +#define SLJIT_FR0 1 +#define SLJIT_FR1 2 +#define SLJIT_FR2 3 +#define SLJIT_FR3 4 +#define SLJIT_FR4 5 +#define SLJIT_FR5 6 +/* All FR registers provided by the architecture can be accessed by SLJIT_FR(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_FLOAT_REGISTERS. */ +#define SLJIT_FR(i) (1 + (i)) + +/* Floating point saved registers. */ +#define SLJIT_FS0 (SLJIT_NUMBER_OF_FLOAT_REGISTERS) +#define SLJIT_FS1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 1) +#define SLJIT_FS2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 2) +#define SLJIT_FS3 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 3) +#define SLJIT_FS4 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 4) +#define SLJIT_FS5 (SLJIT_NUMBER_OF_FLOAT_REGISTERS - 5) +/* All S registers provided by the architecture can be accessed by SLJIT_FS(i) + The i parameter must be >= 0 and < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS. */ +#define SLJIT_FS(i) (SLJIT_NUMBER_OF_FLOAT_REGISTERS - (i)) + +/* Float registers >= SLJIT_FIRST_SAVED_FLOAT_REG are saved registers. */ +#define SLJIT_FIRST_SAVED_FLOAT_REG (SLJIT_FS0 - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS + 1) + +/* --------------------------------------------------------------------- */ +/* Main structures and functions */ +/* --------------------------------------------------------------------- */ + +/* + The following structures are private, and can be changed in the + future. Keeping them here allows code inlining. +*/ + +struct sljit_memory_fragment { + struct sljit_memory_fragment *next; + sljit_uw used_size; + /* Must be aligned to sljit_sw. */ + sljit_ub memory[1]; +}; + +struct sljit_label { + struct sljit_label *next; + sljit_uw addr; + /* The maximum size difference. */ + sljit_uw size; +}; + +struct sljit_jump { + struct sljit_jump *next; + sljit_uw addr; + sljit_sw flags; + union { + sljit_uw target; + struct sljit_label* label; + } u; +}; + +struct sljit_const { + struct sljit_const *next; + sljit_uw addr; +}; + +struct sljit_compiler { + sljit_si error; + sljit_si options; + + struct sljit_label *labels; + struct sljit_jump *jumps; + struct sljit_const *consts; + struct sljit_label *last_label; + struct sljit_jump *last_jump; + struct sljit_const *last_const; + + void *allocator_data; + struct sljit_memory_fragment *buf; + struct sljit_memory_fragment *abuf; + + /* Used scratch registers. */ + sljit_si scratches; + /* Used saved registers. */ + sljit_si saveds; + /* Used float scratch registers. */ + sljit_si fscratches; + /* Used float saved registers. */ + sljit_si fsaveds; + /* Local stack size. */ + sljit_si local_size; + /* Code size. */ + sljit_uw size; + /* For statistical purposes. */ + sljit_uw executable_size; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_si args; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_si mode32; +#endif + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + sljit_si flags_saved; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + /* Constant pool handling. */ + sljit_uw *cpool; + sljit_ub *cpool_unique; + sljit_uw cpool_diff; + sljit_uw cpool_fill; + /* Other members. */ + /* Contains pointer, "ldr pc, [...]" pairs. */ + sljit_uw patches; +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + /* Temporary fields. */ + sljit_uw shift_imm; + sljit_si cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) + sljit_si cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) + sljit_si cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + sljit_sw imm; + sljit_si cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) + sljit_si delay_slot; + sljit_si cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + sljit_si delay_slot; + sljit_si cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_CONFIG_TILEGX && SLJIT_CONFIG_TILEGX) + sljit_si cache_arg; + sljit_sw cache_argw; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + FILE* verbose; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) + /* Local size passed to the functions. */ + sljit_si logical_local_size; +#endif + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ + || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ + || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + sljit_si skip_checks; +#endif +}; + +/* --------------------------------------------------------------------- */ +/* Main functions */ +/* --------------------------------------------------------------------- */ + +/* Creates an sljit compiler. The allocator_data is required by some + custom memory managers. This pointer is passed to SLJIT_MALLOC + and SLJIT_FREE macros. Most allocators (including the default + one) ignores this value, and it is recommended to pass NULL + as a dummy value for allocator_data. + + Returns NULL if failed. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data); + +/* Frees everything except the compiled machine code. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler); + +/* Returns the current error code. If an error is occurred, future sljit + calls which uses the same compiler argument returns early with the same + error code. Thus there is no need for checking the error after every + call, it is enough to do it before the code is compiled. Removing + these checks increases the performance of the compiling process. */ +static SLJIT_INLINE sljit_si sljit_get_compiler_error(struct sljit_compiler *compiler) { return compiler->error; } + +/* + Allocate a small amount of memory. The size must be <= 64 bytes on 32 bit, + and <= 128 bytes on 64 bit architectures. The memory area is owned by the + compiler, and freed by sljit_free_compiler. The returned pointer is + sizeof(sljit_sw) aligned. Excellent for allocating small blocks during + the compiling, and no need to worry about freeing them. The size is + enough to contain at most 16 pointers. If the size is outside of the range, + the function will return with NULL. However, this return value does not + indicate that there is no more memory (does not set the current error code + of the compiler to out-of-memory status). +*/ +SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_si size); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) +/* Passing NULL disables verbose. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); +#endif + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler); +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code); + +/* + After the machine code generation is finished we can retrieve the allocated + executable memory size, although this area may not be fully filled with + instructions depending on some optimizations. This function is useful only + for statistical purposes. + + Before a successful code generation, this function returns with 0. +*/ +static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; } + +/* Instruction generation. Returns with any error code. If there is no + error, they return with SLJIT_SUCCESS. */ + +/* + The executable code is a function call from the viewpoint of the C + language. The function calls must obey to the ABI (Application + Binary Interface) of the platform, which specify the purpose of + all machine registers and stack handling among other things. The + sljit_emit_enter function emits the necessary instructions for + setting up a new context for the executable code and moves function + arguments to the saved registers. Furthermore the options argument + can be used to pass configuration options to the compiler. The + available options are listed before sljit_emit_enter. + + The number of sljit_sw arguments passed to the generated function + are specified in the "args" parameter. The number of arguments must + be less than or equal to 3. The first argument goes to SLJIT_S0, + the second goes to SLJIT_S1 and so on. The register set used by + the function must be declared as well. The number of scratch and + saved registers used by the function must be passed to sljit_emit_enter. + Only R registers between R0 and "scratches" argument can be used + later. E.g. if "scratches" is set to 2, the register set will be + limited to R0 and R1. The S registers and the floating point + registers ("fscratches" and "fsaveds") are specified in a similar + way. The sljit_emit_enter is also capable of allocating a stack + space for local variables. The "local_size" argument contains the + size in bytes of this local area and its staring address is stored + in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and + SLJIT_SP + local_size (exclusive) can be modified freely until + the function returns. The stack space is not initialized. + + Note: the following conditions must met: + 0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS + 0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS + scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS + 0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + 0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS + + Note: every call of sljit_emit_enter and sljit_set_context + overwrites the previous context. +*/ + +/* The absolute address returned by sljit_get_local_base with +offset 0 is aligned to sljit_d. Otherwise it is aligned to sljit_uw. */ +#define SLJIT_DOUBLE_ALIGNMENT 0x00000001 + +/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ +#define SLJIT_MAX_LOCAL_SIZE 65536 + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); + +/* The machine code has a context (which contains the local stack space size, + number of used registers, etc.) which initialized by sljit_emit_enter. Several + functions (like sljit_emit_return) requres this context to be able to generate + the appropriate code. However, some code fragments (like inline cache) may have + no normal entry point so their context is unknown for the compiler. Their context + can be provided to the compiler by the sljit_set_context function. + + Note: every call of sljit_emit_enter and sljit_set_context overwrites + the previous context. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size); + +/* Return from machine code. The op argument can be SLJIT_UNUSED which means the + function does not return with anything or any opcode between SLJIT_MOV and + SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op + is SLJIT_UNUSED, otherwise see below the description about source and + destination arguments. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, + sljit_si src, sljit_sw srcw); + +/* Fast calling mechanism for utility functions (see SLJIT_FAST_CALL). All registers and + even the stack frame is passed to the callee. The return address is preserved in + dst/dstw by sljit_emit_fast_enter (the type of the value stored by this function + is sljit_p), and sljit_emit_fast_return can use this as a return value later. */ + +/* Note: only for sljit specific, non ABI compilant calls. Fast, since only a few machine + instructions are needed. Excellent for small uility functions, where saving registers + and setting up a new stack frame would cost too much performance. However, it is still + possible to return to the address of the caller (or anywhere else). */ + +/* Note: flags are not changed (unlike sljit_emit_enter / sljit_emit_return). */ + +/* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested, + since many architectures do clever branch prediction on call / return instruction pairs. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw); +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw); + +/* + Source and destination values for arithmetical instructions + imm - a simple immediate value (cannot be used as a destination) + reg - any of the registers (immediate argument must be 0) + [imm] - absolute immediate memory address + [reg+imm] - indirect memory address + [reg+(reg<=0 ) of any SLJIT_R, + SLJIT_S and SLJIT_SP registers. + + Note: it returns with -1 for virtual registers (only on x86-32). */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg); + +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index of any SLJIT_FLOAT register. + + Note: the index is always an even number on ARM (except ARM-64), MIPS, and SPARC. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg); + +/* Any instruction can be inserted into the instruction stream by + sljit_emit_op_custom. It has a similar purpose as inline assembly. + The size parameter must match to the instruction size of the target + architecture: + + x86: 0 < size <= 15. The instruction argument can be byte aligned. + Thumb2: if size == 2, the instruction argument must be 2 byte aligned. + if size == 4, the instruction argument must be 4 byte aligned. + Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size); + +/* Returns with non-zero if fpu is available. */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void); + +/* Starting index of opcodes for sljit_emit_fop1. */ +#define SLJIT_FOP1_BASE 128 + +/* Flags: SP - (never set any flags) */ +#define SLJIT_DMOV (SLJIT_FOP1_BASE + 0) +#define SLJIT_SMOV (SLJIT_DMOV | SLJIT_SINGLE_OP) +/* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] + SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int + Rounding mode when the destination is W or I: round towards zero. */ +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVD_FROMS (SLJIT_FOP1_BASE + 1) +#define SLJIT_CONVS_FROMD (SLJIT_CONVD_FROMS | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVW_FROMD (SLJIT_FOP1_BASE + 2) +#define SLJIT_CONVW_FROMS (SLJIT_CONVW_FROMD | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVI_FROMD (SLJIT_FOP1_BASE + 3) +#define SLJIT_CONVI_FROMS (SLJIT_CONVI_FROMD | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVD_FROMW (SLJIT_FOP1_BASE + 4) +#define SLJIT_CONVS_FROMW (SLJIT_CONVD_FROMW | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_CONVD_FROMI (SLJIT_FOP1_BASE + 5) +#define SLJIT_CONVS_FROMI (SLJIT_CONVD_FROMI | SLJIT_SINGLE_OP) +/* Note: dst is the left and src is the right operand for SLJIT_CMPD. + Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag + is set, the comparison result is unpredictable. + Flags: SP | E | S (see SLJIT_C_FLOAT_*) */ +#define SLJIT_DCMP (SLJIT_FOP1_BASE + 6) +#define SLJIT_SCMP (SLJIT_DCMP | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_DNEG (SLJIT_FOP1_BASE + 7) +#define SLJIT_SNEG (SLJIT_DNEG | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_DABS (SLJIT_FOP1_BASE + 8) +#define SLJIT_SABS (SLJIT_DABS | SLJIT_SINGLE_OP) + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw); + +/* Starting index of opcodes for sljit_emit_fop2. */ +#define SLJIT_FOP2_BASE 160 + +/* Flags: SP - (never set any flags) */ +#define SLJIT_DADD (SLJIT_FOP2_BASE + 0) +#define SLJIT_SADD (SLJIT_DADD | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_DSUB (SLJIT_FOP2_BASE + 1) +#define SLJIT_SSUB (SLJIT_DSUB | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_DMUL (SLJIT_FOP2_BASE + 2) +#define SLJIT_SMUL (SLJIT_DMUL | SLJIT_SINGLE_OP) +/* Flags: SP - (never set any flags) */ +#define SLJIT_DDIV (SLJIT_FOP2_BASE + 3) +#define SLJIT_SDIV (SLJIT_DDIV | SLJIT_SINGLE_OP) + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w); + +/* Label and jump instructions. */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler); + +/* Invert (negate) conditional type: xor (^) with 0x1 */ + +/* Integer comparison types. */ +#define SLJIT_EQUAL 0 +#define SLJIT_I_EQUAL (SLJIT_EQUAL | SLJIT_INT_OP) +#define SLJIT_ZERO 0 +#define SLJIT_I_ZERO (SLJIT_ZERO | SLJIT_INT_OP) +#define SLJIT_NOT_EQUAL 1 +#define SLJIT_I_NOT_EQUAL (SLJIT_NOT_EQUAL | SLJIT_INT_OP) +#define SLJIT_NOT_ZERO 1 +#define SLJIT_I_NOT_ZERO (SLJIT_NOT_ZERO | SLJIT_INT_OP) + +#define SLJIT_LESS 2 +#define SLJIT_I_LESS (SLJIT_LESS | SLJIT_INT_OP) +#define SLJIT_GREATER_EQUAL 3 +#define SLJIT_I_GREATER_EQUAL (SLJIT_GREATER_EQUAL | SLJIT_INT_OP) +#define SLJIT_GREATER 4 +#define SLJIT_I_GREATER (SLJIT_GREATER | SLJIT_INT_OP) +#define SLJIT_LESS_EQUAL 5 +#define SLJIT_I_LESS_EQUAL (SLJIT_LESS_EQUAL | SLJIT_INT_OP) +#define SLJIT_SIG_LESS 6 +#define SLJIT_I_SIG_LESS (SLJIT_SIG_LESS | SLJIT_INT_OP) +#define SLJIT_SIG_GREATER_EQUAL 7 +#define SLJIT_I_SIG_GREATER_EQUAL (SLJIT_SIG_GREATER_EQUAL | SLJIT_INT_OP) +#define SLJIT_SIG_GREATER 8 +#define SLJIT_I_SIG_GREATER (SLJIT_SIG_GREATER | SLJIT_INT_OP) +#define SLJIT_SIG_LESS_EQUAL 9 +#define SLJIT_I_SIG_LESS_EQUAL (SLJIT_SIG_LESS_EQUAL | SLJIT_INT_OP) + +#define SLJIT_OVERFLOW 10 +#define SLJIT_I_OVERFLOW (SLJIT_OVERFLOW | SLJIT_INT_OP) +#define SLJIT_NOT_OVERFLOW 11 +#define SLJIT_I_NOT_OVERFLOW (SLJIT_NOT_OVERFLOW | SLJIT_INT_OP) + +#define SLJIT_MUL_OVERFLOW 12 +#define SLJIT_I_MUL_OVERFLOW (SLJIT_MUL_OVERFLOW | SLJIT_INT_OP) +#define SLJIT_MUL_NOT_OVERFLOW 13 +#define SLJIT_I_MUL_NOT_OVERFLOW (SLJIT_MUL_NOT_OVERFLOW | SLJIT_INT_OP) + +/* Floating point comparison types. */ +#define SLJIT_D_EQUAL 14 +#define SLJIT_S_EQUAL (SLJIT_D_EQUAL | SLJIT_SINGLE_OP) +#define SLJIT_D_NOT_EQUAL 15 +#define SLJIT_S_NOT_EQUAL (SLJIT_D_NOT_EQUAL | SLJIT_SINGLE_OP) +#define SLJIT_D_LESS 16 +#define SLJIT_S_LESS (SLJIT_D_LESS | SLJIT_SINGLE_OP) +#define SLJIT_D_GREATER_EQUAL 17 +#define SLJIT_S_GREATER_EQUAL (SLJIT_D_GREATER_EQUAL | SLJIT_SINGLE_OP) +#define SLJIT_D_GREATER 18 +#define SLJIT_S_GREATER (SLJIT_D_GREATER | SLJIT_SINGLE_OP) +#define SLJIT_D_LESS_EQUAL 19 +#define SLJIT_S_LESS_EQUAL (SLJIT_D_LESS_EQUAL | SLJIT_SINGLE_OP) +#define SLJIT_D_UNORDERED 20 +#define SLJIT_S_UNORDERED (SLJIT_D_UNORDERED | SLJIT_SINGLE_OP) +#define SLJIT_D_ORDERED 21 +#define SLJIT_S_ORDERED (SLJIT_D_ORDERED | SLJIT_SINGLE_OP) + +/* Unconditional jump types. */ +#define SLJIT_JUMP 22 +#define SLJIT_FAST_CALL 23 +#define SLJIT_CALL0 24 +#define SLJIT_CALL1 25 +#define SLJIT_CALL2 26 +#define SLJIT_CALL3 27 + +/* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */ + +/* The target can be changed during runtime (see: sljit_set_jump_addr). */ +#define SLJIT_REWRITABLE_JUMP 0x1000 + +/* Emit a jump instruction. The destination is not set, only the type of the jump. + type must be between SLJIT_EQUAL and SLJIT_CALL3 + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + Flags: - (never set any flags) for both conditional and unconditional jumps. + Flags: destroy all flags for calls. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type); + +/* Basic arithmetic comparison. In most architectures it is implemented as + an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting + appropriate flags) followed by a sljit_emit_jump. However some + architectures (i.e: ARM64 or MIPS) may employ special optimizations here. + It is suggested to use this comparison form when appropriate. + type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + Flags: destroy flags. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w); + +/* Basic floating point comparison. In most architectures it is implemented as + an SLJIT_FCMP operation (setting appropriate flags) followed by a + sljit_emit_jump. However some architectures (i.e: MIPS) may employ + special optimizations here. It is suggested to use this comparison form + when appropriate. + type must be between SLJIT_D_EQUAL and SLJIT_S_ORDERED + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + Flags: destroy flags. + Note: if either operand is NaN, the behaviour is undefined for + types up to SLJIT_S_LESS_EQUAL. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w); + +/* Set the destination of the jump to this label. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label); +/* Set the destination address of the jump to this label. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target); + +/* Call function or jump anywhere. Both direct and indirect form + type must be between SLJIT_JUMP and SLJIT_CALL3 + Direct form: set src to SLJIT_IMM() and srcw to the address + Indirect form: any other valid addressing mode + Flags: - (never set any flags) for unconditional jumps. + Flags: destroy all flags for calls. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw); + +/* Perform the operation using the conditional flags as the second argument. + Type must always be between SLJIT_EQUAL and SLJIT_S_ORDERED. The value + represented by the type is 1, if the condition represented by the type + is fulfilled, and 0 otherwise. + + If op == SLJIT_MOV, SLJIT_MOV_SI, SLJIT_MOV_UI: + Set dst to the value represented by the type (0 or 1). + Src must be SLJIT_UNUSED, and srcw must be 0 + Flags: - (never set any flags) + If op == SLJIT_OR, op == SLJIT_AND, op == SLJIT_XOR + Performs the binary operation using src as the first, and the value + represented by type as the second argument. + Important note: only dst=src and dstw=srcw is supported at the moment! + Flags: I | E | K + Note: sljit_emit_op_flags does nothing, if dst is SLJIT_UNUSED (regardless of op). */ +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type); + +/* Copies the base address of SLJIT_SP + offset to dst. + Flags: - (never set any flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset); + +/* The constant can be changed runtime (see: sljit_set_const) + Flags: - (never set any flags) */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value); + +/* After the code generation the address for label, jump and const instructions + are computed. Since these structures are freed by sljit_free_compiler, the + addresses must be preserved by the user program elsewere. */ +static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { return label->addr; } +static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } +static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } + +/* Only the address is required to rewrite the code. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant); + +/* --------------------------------------------------------------------- */ +/* Miscellaneous utility functions */ +/* --------------------------------------------------------------------- */ + +#define SLJIT_MAJOR_VERSION 0 +#define SLJIT_MINOR_VERSION 93 + +/* Get the human readable name of the platform. Can be useful on platforms + like ARM, where ARM and Thumb2 functions can be mixed, and + it is useful to know the type of the code generator. */ +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void); + +/* Portable helper function to get an offset of a member. */ +#define SLJIT_OFFSETOF(base, member) ((sljit_sw)(&((base*)0x10)->member) - 0x10) + +#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) +/* This global lock is useful to compile common functions. */ +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void); +#endif + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +/* The sljit_stack is a utiliy feature of sljit, which allocates a + writable memory region between base (inclusive) and limit (exclusive). + Both base and limit is a pointer, and base is always <= than limit. + This feature uses the "address space reserve" feature + of modern operating systems. Basically we don't need to allocate a + huge memory block in one step for the worst case, we can start with + a smaller chunk and extend it later. Since the address space is + reserved, the data never copied to other regions, thus it is safe + to store pointers here. */ + +/* Note: The base field is aligned to PAGE_SIZE bytes (usually 4k or more). + Note: stack growing should not happen in small steps: 4k, 16k or even + bigger growth is better. + Note: this structure may not be supported by all operating systems. + Some kind of fallback mechanism is suggested when SLJIT_UTIL_STACK + is not defined. */ + +struct sljit_stack { + /* User data, anything can be stored here. + Starting with the same value as base. */ + sljit_uw top; + /* These members are read only. */ + sljit_uw base; + sljit_uw limit; + sljit_uw max_limit; +}; + +/* Returns NULL if unsuccessful. + Note: limit and max_limit contains the size for stack allocation. + Note: the top field is initialized to base. + Note: see sljit_create_compiler for the explanation of allocator_data. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data); +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data); + +/* Can be used to increase (allocate) or decrease (free) the memory area. + Returns with a non-zero value if unsuccessful. If new_limit is greater than + max_limit, it will fail. It is very easy to implement a stack data structure, + since the growth ratio can be added to the current limit, and sljit_stack_resize + will do all the necessary checks. The fields of the stack are not changed if + sljit_stack_resize fails. */ +SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_uw new_limit); + +#endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ + +#if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + +/* Get the entry address of a given function. */ +#define SLJIT_FUNC_OFFSET(func_name) ((sljit_sw)func_name) + +#else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +/* All JIT related code should be placed in the same context (library, binary, etc.). */ + +#define SLJIT_FUNC_OFFSET(func_name) (*(sljit_sw*)(void*)func_name) + +/* For powerpc64, the function pointers point to a context descriptor. */ +struct sljit_function_context { + sljit_sw addr; + sljit_sw r2; + sljit_sw r11; +}; + +/* Fill the context arguments using the addr and the function. + If func_ptr is NULL, it will not be set to the address of context + If addr is NULL, the function address also comes from the func pointer. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func); + +#endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ + +#endif /* _SLJIT_LIR_H_ */ diff --git a/src/sljit/sljitNativeARM_32.c b/src/sljit/sljitNativeARM_32.c new file mode 100644 index 0000000..81f200d --- /dev/null +++ b/src/sljit/sljitNativeARM_32.c @@ -0,0 +1,2549 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return "ARMv7" SLJIT_CPUINFO; +#elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + return "ARMv5" SLJIT_CPUINFO; +#else +#error "Internal error: Unknown ARM architecture" +#endif +} + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) + +#define TMP_FREG1 (0) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) + +/* In ARM instruction words. + Cache lines are usually 32 byte aligned. */ +#define CONST_POOL_ALIGNMENT 8 +#define CONST_POOL_EMPTY 0xffffffff + +#define ALIGN_INSTRUCTION(ptr) \ + (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1)) +#define MAX_DIFFERENCE(max_diff) \ + (((max_diff) / (sljit_si)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) + +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15 +}; + +#define RM(rm) (reg_map[rm]) +#define RD(rd) (reg_map[rd] << 12) +#define RN(rn) (reg_map[rn] << 16) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* The instruction includes the AL condition. + INST_NAME - CONDITIONAL remove this flag. */ +#define COND_MASK 0xf0000000 +#define CONDITIONAL 0xe0000000 +#define PUSH_POOL 0xff000000 + +/* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */ +#define ADC_DP 0x5 +#define ADD_DP 0x4 +#define AND_DP 0x0 +#define B 0xea000000 +#define BIC_DP 0xe +#define BL 0xeb000000 +#define BLX 0xe12fff30 +#define BX 0xe12fff10 +#define CLZ 0xe16f0f10 +#define CMP_DP 0xa +#define BKPT 0xe1200070 +#define EOR_DP 0x1 +#define MOV_DP 0xd +#define MUL 0xe0000090 +#define MVN_DP 0xf +#define NOP 0xe1a00000 +#define ORR_DP 0xc +#define PUSH 0xe92d0000 +#define POP 0xe8bd0000 +#define RSB_DP 0x3 +#define RSC_DP 0x7 +#define SBC_DP 0x6 +#define SMULL 0xe0c00090 +#define SUB_DP 0x2 +#define UMULL 0xe0800090 +#define VABS_F32 0xeeb00ac0 +#define VADD_F32 0xee300a00 +#define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 +#define VDIV_F32 0xee800a00 +#define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 +#define VMRS 0xeef1fa10 +#define VMUL_F32 0xee200a00 +#define VNEG_F32 0xeeb10a40 +#define VSTR_F32 0xed000a00 +#define VSUB_F32 0xee300a40 + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) +/* Arm v7 specific instructions. */ +#define MOVW 0xe3000000 +#define MOVT 0xe3400000 +#define SXTB 0xe6af0070 +#define SXTH 0xe6bf0070 +#define UXTB 0xe6ef0070 +#define UXTH 0xe6ff0070 +#endif + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +static sljit_si push_cpool(struct sljit_compiler *compiler) +{ + /* Pushing the constant pool into the instruction stream. */ + sljit_uw* inst; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_si i; + + /* The label could point the address after the constant pool. */ + if (compiler->last_label && compiler->last_label->size == compiler->size) + compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1; + + SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE); + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0xff000000 | compiler->cpool_fill; + + for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) { + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = 0; + } + + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + while (cpool_ptr < cpool_end) { + inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!inst); + compiler->size++; + *inst = *cpool_ptr++; + } + compiler->cpool_diff = CONST_POOL_EMPTY; + compiler->cpool_fill = 0; + return SLJIT_SUCCESS; +} + +static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst) +{ + sljit_uw* ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static sljit_si push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +{ + sljit_uw* ptr; + sljit_uw cpool_index = CPOOL_SIZE; + sljit_uw* cpool_ptr; + sljit_uw* cpool_end; + sljit_ub* cpool_unique_ptr; + + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092))) + FAIL_IF(push_cpool(compiler)); + else if (compiler->cpool_fill > 0) { + cpool_ptr = compiler->cpool; + cpool_end = cpool_ptr + compiler->cpool_fill; + cpool_unique_ptr = compiler->cpool_unique; + do { + if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) { + cpool_index = cpool_ptr - compiler->cpool; + break; + } + cpool_ptr++; + cpool_unique_ptr++; + } while (cpool_ptr < cpool_end); + } + + if (cpool_index == CPOOL_SIZE) { + /* Must allocate a new entry in the literal pool. */ + if (compiler->cpool_fill < CPOOL_SIZE) { + cpool_index = compiler->cpool_fill; + compiler->cpool_fill++; + } + else { + FAIL_IF(push_cpool(compiler)); + cpool_index = 0; + compiler->cpool_fill = 1; + } + } + + SLJIT_ASSERT((inst & 0xfff) == 0); + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | cpool_index; + + compiler->cpool[cpool_index] = literal; + compiler->cpool_unique[cpool_index] = 0; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static sljit_si push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal) +{ + sljit_uw* ptr; + if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE)) + FAIL_IF(push_cpool(compiler)); + + SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0); + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst | compiler->cpool_fill; + + compiler->cpool[compiler->cpool_fill] = literal; + compiler->cpool_unique[compiler->cpool_fill] = 1; + compiler->cpool_fill++; + if (compiler->cpool_diff == CONST_POOL_EMPTY) + compiler->cpool_diff = compiler->size; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si prepare_blx(struct sljit_compiler *compiler) +{ + /* Place for at least two instruction (doesn't matter whether the first has a literal). */ + if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088))) + return push_cpool(compiler); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_blx(struct sljit_compiler *compiler) +{ + /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ + SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); + return push_inst(compiler, BLX | RM(TMP_REG1)); +} + +static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) +{ + sljit_uw diff; + sljit_uw ind; + sljit_uw counter = 0; + sljit_uw* clear_const_pool = const_pool; + sljit_uw* clear_const_pool_end = const_pool + cpool_size; + + SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT); + /* Set unused flag for all literals in the constant pool. + I.e.: unused literals can belong to branches, which can be encoded as B or BL. + We can "compress" the constant pool by discarding these literals. */ + while (clear_const_pool < clear_const_pool_end) + *clear_const_pool++ = (sljit_uw)(-1); + + while (last_pc_patch < code_ptr) { + /* Data transfer instruction with Rn == r15. */ + if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) { + diff = const_pool - last_pc_patch; + ind = (*last_pc_patch) & 0xfff; + + /* Must be a load instruction with immediate offset. */ + SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20))); + if ((sljit_si)const_pool[ind] < 0) { + const_pool[ind] = counter; + ind = counter; + counter++; + } + else + ind = const_pool[ind]; + + SLJIT_ASSERT(diff >= 1); + if (diff >= 2 || ind > 0) { + diff = (diff + ind - 2) << 2; + SLJIT_ASSERT(diff <= 0xfff); + *last_pc_patch = (*last_pc_patch & ~0xfff) | diff; + } + else + *last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004; + } + last_pc_patch++; + } + return counter; +} + +/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */ +struct future_patch { + struct future_patch* next; + sljit_si index; + sljit_si value; +}; + +static SLJIT_INLINE sljit_si resolve_const_pool_index(struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr) +{ + sljit_si value; + struct future_patch *curr_patch, *prev_patch; + + /* Using the values generated by patch_pc_relative_loads. */ + if (!*first_patch) + value = (sljit_si)cpool_start_address[cpool_current_index]; + else { + curr_patch = *first_patch; + prev_patch = 0; + while (1) { + if (!curr_patch) { + value = (sljit_si)cpool_start_address[cpool_current_index]; + break; + } + if ((sljit_uw)curr_patch->index == cpool_current_index) { + value = curr_patch->value; + if (prev_patch) + prev_patch->next = curr_patch->next; + else + *first_patch = curr_patch->next; + SLJIT_FREE(curr_patch, compiler->allocator_data); + break; + } + prev_patch = curr_patch; + curr_patch = curr_patch->next; + } + } + + if (value >= 0) { + if ((sljit_uw)value > cpool_current_index) { + curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data); + if (!curr_patch) { + while (*first_patch) { + curr_patch = *first_patch; + *first_patch = (*first_patch)->next; + SLJIT_FREE(curr_patch, compiler->allocator_data); + } + return SLJIT_ERR_ALLOC_FAILED; + } + curr_patch->next = *first_patch; + curr_patch->index = value; + curr_patch->value = cpool_start_address[value]; + *first_patch = curr_patch; + } + cpool_start_address[value] = *buf_ptr; + } + return SLJIT_SUCCESS; +} + +#else + +static sljit_si push_inst(struct sljit_compiler *compiler, sljit_uw inst) +{ + sljit_uw* ptr; + + ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw)); + FAIL_IF(!ptr); + compiler->size++; + *ptr = inst; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_imm(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) +{ + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); +} + +#endif + +static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code) +{ + sljit_sw diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (jump->flags & IS_BL) + code_ptr--; + + if (jump->flags & JUMP_ADDR) + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)); + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); + } + + /* Branch to Thumb code has not been optimized yet. */ + if (diff & 0x3) + return 0; + + if (jump->flags & IS_BL) { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } + } + else { + if (diff <= 0x01ffffff && diff >= -0x02000000) { + *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK); + jump->flags |= PATCH_B; + } + } +#else + if (jump->flags & JUMP_ADDR) + diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr); + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); + } + + /* Branch to Thumb code has not been optimized yet. */ + if (diff & 0x3) + return 0; + + if (diff <= 0x01ffffff && diff >= -0x02000000) { + code_ptr -= 2; + *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK); + jump->flags |= PATCH_B; + return 1; + } +#endif + return 0; +} + +static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_si flush) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw *ptr = (sljit_uw*)addr; + sljit_uw *inst = (sljit_uw*)ptr[0]; + sljit_uw mov_pc = ptr[1]; + sljit_si bl = (mov_pc & 0x0000f000) != RD(TMP_PC); + sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2); + + if (diff <= 0x7fffff && diff >= -0x800000) { + /* Turn to branch. */ + if (!bl) { + inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); + inst[1] = NOP; + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } else { + /* Get the position of the constant. */ + if (mov_pc & (1 << 23)) + ptr = inst + ((mov_pc & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != mov_pc) { + inst[0] = mov_pc; + if (!bl) { + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } else { + inst[1] = BLX | RM(TMP_REG1); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 2); + } + } + } + *ptr = new_addr; + } +#else + sljit_uw *inst = (sljit_uw*)addr; + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif +} + +static sljit_uw get_imm(sljit_uw imm); + +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_si flush) +{ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw *ptr = (sljit_uw*)addr; + sljit_uw *inst = (sljit_uw*)ptr[0]; + sljit_uw ldr_literal = ptr[1]; + sljit_uw src2; + + src2 = get_imm(new_constant); + if (src2) { + *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + src2 = get_imm(~new_constant); + if (src2) { + *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + return; + } + + if (ldr_literal & (1 << 23)) + ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2; + else + ptr = inst + 1; + + if (*inst != ldr_literal) { + *inst = ldr_literal; + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 1); + } + } + *ptr = new_constant; +#else + sljit_uw *inst = (sljit_uw*)addr; + SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); + inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); + inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); + if (flush) { + SLJIT_CACHE_FLUSH(inst, inst + 2); + } +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_uw *code; + sljit_uw *code_ptr; + sljit_uw *buf_ptr; + sljit_uw *buf_end; + sljit_uw size; + sljit_uw word_count; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + sljit_uw cpool_size; + sljit_uw cpool_skip_alignment; + sljit_uw cpool_current_index; + sljit_uw *cpool_start_address; + sljit_uw *last_pc_patch; + struct future_patch *first_patch; +#endif + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + /* Second code generation pass. */ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + size = compiler->size + (compiler->patches << 1); + if (compiler->cpool_fill > 0) + size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1; +#else + size = compiler->size; +#endif + code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + cpool_size = 0; + cpool_skip_alignment = 0; + cpool_current_index = 0; + cpool_start_address = NULL; + first_patch = NULL; + last_pc_patch = code; +#endif + + code_ptr = code; + word_count = 0; + + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + + if (label && label->size == 0) { + label->addr = (sljit_uw)code; + label->size = 0; + label = label->next; + } + + do { + buf_ptr = (sljit_uw*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + word_count++; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (cpool_size > 0) { + if (cpool_skip_alignment > 0) { + buf_ptr++; + cpool_skip_alignment--; + } + else { + if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + if (++cpool_current_index >= cpool_size) { + SLJIT_ASSERT(!first_patch); + cpool_size = 0; + if (label && label->size == word_count) { + /* Points after the current instruction. */ + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + } + } + } + else if ((*buf_ptr & 0xff000000) != PUSH_POOL) { +#endif + *code_ptr = *buf_ptr++; + /* These structures are ordered by their address. */ + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (detect_jump_type(jump, code_ptr, code)) + code_ptr--; + jump->addr = (sljit_uw)code_ptr; +#else + jump->addr = (sljit_uw)(code_ptr - 2); + if (detect_jump_type(jump, code_ptr, code)) + code_ptr -= 2; +#endif + jump = jump->next; + } + if (label && label->size == word_count) { + /* code_ptr can be affected above. */ + label->addr = (sljit_uw)(code_ptr + 1); + label->size = (code_ptr + 1) - code; + label = label->next; + } + if (const_ && const_->addr == word_count) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + const_->addr = (sljit_uw)code_ptr; +#else + const_->addr = (sljit_uw)(code_ptr - 1); +#endif + const_ = const_->next; + } + code_ptr++; +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + } + else { + /* Fortunately, no need to shift. */ + cpool_size = *buf_ptr++ & ~PUSH_POOL; + SLJIT_ASSERT(cpool_size > 0); + cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size); + if (cpool_current_index > 0) { + /* Unconditional branch. */ + *code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL); + code_ptr = cpool_start_address + cpool_current_index; + } + cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1; + cpool_current_index = 0; + last_pc_patch = code_ptr; + } +#endif + } while (buf_ptr < buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + SLJIT_ASSERT(cpool_size == 0); + if (compiler->cpool_fill > 0) { + cpool_start_address = ALIGN_INSTRUCTION(code_ptr); + cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill); + if (cpool_current_index > 0) + code_ptr = cpool_start_address + cpool_current_index; + + buf_ptr = compiler->cpool; + buf_end = buf_ptr + compiler->cpool_fill; + cpool_current_index = 0; + while (buf_ptr < buf_end) { + if (SLJIT_UNLIKELY(resolve_const_pool_index(&first_patch, cpool_current_index, cpool_start_address, buf_ptr))) { + SLJIT_FREE_EXEC(code); + compiler->error = SLJIT_ERR_ALLOC_FAILED; + return NULL; + } + buf_ptr++; + cpool_current_index++; + } + SLJIT_ASSERT(!first_patch); + } +#endif + + jump = compiler->jumps; + while (jump) { + buf_ptr = (sljit_uw*)jump->addr; + + if (jump->flags & PATCH_B) { + if (!(jump->flags & JUMP_ADDR)) { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); + *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; + } + else { + SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); + *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; + } + } + else if (jump->flags & SLJIT_REWRITABLE_JUMP) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + jump->addr = (sljit_uw)code_ptr; + code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[1] = *buf_ptr; + inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + code_ptr += 2; +#else + inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); +#endif + } + else { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (jump->flags & IS_BL) + buf_ptr--; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; +#else + inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); +#endif + } + jump = jump->next; + } + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + const_ = compiler->consts; + while (const_) { + buf_ptr = (sljit_uw*)const_->addr; + const_->addr = (sljit_uw)code_ptr; + + code_ptr[0] = (sljit_uw)buf_ptr; + code_ptr[1] = *buf_ptr; + if (*buf_ptr & (1 << 23)) + buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2; + else + buf_ptr += 1; + /* Set the value again (can be a simple constant). */ + inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0); + code_ptr += 2; + + const_ = const_->next; + } +#endif + + SLJIT_ASSERT(code_ptr - code <= (sljit_si)size); + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); + SLJIT_CACHE_FLUSH(code, code_ptr); + return code; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* emit_op inp_flags. + WRITE_BACK must be the first, since it is a flag. */ +#define WRITE_BACK 0x01 +#define ALLOW_IMM 0x02 +#define ALLOW_INV_IMM 0x04 +#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) +#define ARG_TEST 0x08 + +/* Creates an index in data_transfer_insts array. */ +#define WORD_DATA 0x00 +#define BYTE_DATA 0x10 +#define HALF_DATA 0x20 +#define SIGNED_DATA 0x40 +#define LOAD_DATA 0x80 + +/* Condition: AL. */ +#define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ + (0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2)) + +static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w); + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si size, i, tmp; + sljit_uw push; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + /* Push saved registers, temporary registers + stmdb sp!, {..., lr} */ + push = PUSH | (1 << 14); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF(push_inst(compiler, push)); + + /* Stack must be aligned to 8 bytes: */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + local_size = ((size + local_size + 7) & ~7) - size; + compiler->local_size = local_size; + if (local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size)); + + if (args >= 1) + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0)))); + if (args >= 2) + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1)))); + if (args >= 3) + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2)))); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((size + local_size + 7) & ~7) - size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + sljit_si i, tmp; + sljit_uw pop; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + if (compiler->local_size > 0) + FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); + + /* Push saved registers, temporary registers + ldmia sp!, {..., pc} */ + pop = POP | (1 << 15); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; + + return push_inst(compiler, pop); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* s/l - store/load (1 bit) + u/s - signed/unsigned (1 bit) + w/b/h/N - word/byte/half/NOT allowed (2 bit) + It contans 16 items, but not all are different. */ + +static sljit_sw data_transfer_insts[16] = { +/* s u w */ 0xe5000000 /* str */, +/* s u b */ 0xe5400000 /* strb */, +/* s u h */ 0xe10000b0 /* strh */, +/* s u N */ 0x00000000 /* not allowed */, +/* s s w */ 0xe5000000 /* str */, +/* s s b */ 0xe5400000 /* strb */, +/* s s h */ 0xe10000b0 /* strh */, +/* s s N */ 0x00000000 /* not allowed */, + +/* l u w */ 0xe5100000 /* ldr */, +/* l u b */ 0xe5500000 /* ldrb */, +/* l u h */ 0xe11000b0 /* ldrh */, +/* l u N */ 0x00000000 /* not allowed */, +/* l s w */ 0xe5100000 /* ldr */, +/* l s b */ 0xe11000d0 /* ldrsb */, +/* l s h */ 0xe11000f0 /* ldrsh */, +/* l s N */ 0x00000000 /* not allowed */, +}; + +#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \ + (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2)) +/* Normal ldr/str instruction. + Type2: ldrsb, ldrh, ldrsh */ +#define IS_TYPE1_TRANSFER(type) \ + (data_transfer_insts[(type) >> 4] & 0x04000000) +#define TYPE2_TRANSFER_IMM(imm) \ + (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) + +/* flags: */ + /* Arguments are swapped. */ +#define ARGS_SWAPPED 0x01 + /* Inverted immediate. */ +#define INV_IMM 0x02 + /* Source and destination is register. */ +#define REG_DEST 0x04 +#define REG_SOURCE 0x08 + /* One instruction is enough. */ +#define FAST_DEST 0x10 + /* Multiple instructions are required. */ +#define SLOW_DEST 0x20 +/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS (1 << 20) +/* dst: reg + src1: reg + src2: reg or imm (if allowed) + SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ +#define SRC2_IMM (1 << 25) + +#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))) + +#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2)) + +#define EMIT_SHIFT_INS_AND_RETURN(opcode) \ + SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ + if (compiler->shift_imm != 0x20) { \ + SLJIT_ASSERT(src1 == TMP_REG1); \ + SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ + if (compiler->shift_imm != 0) \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \ + } \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1]))); + +static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, + sljit_si dst, sljit_si src1, sljit_si src2) +{ + sljit_sw mul_inst; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if (dst != src2) { + if (src2 & SRC2_IMM) { + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (op == SLJIT_MOV_UB) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]))); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_UB ? 0x20 : 0x40) | reg_map[dst])); +#else + return push_inst(compiler, (op == SLJIT_MOV_UB ? UXTB : SXTB) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); + if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]))); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_UH ? 0x20 : 0x40) | reg_map[dst])); +#else + return push_inst(compiler, (op == SLJIT_MOV_UH ? UXTH : SXTH) | RD(dst) | RM(src2)); +#endif + } + else if (dst != src2) { + SLJIT_ASSERT(src2 & SRC2_IMM); + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + if (src2 & SRC2_IMM) { + if (flags & INV_IMM) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + } + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); + if (flags & SET_FLAGS) + EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM); + return SLJIT_SUCCESS; + + case SLJIT_ADD: + SLJIT_ASSERT(!(flags & INV_IMM)); + EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP); + + case SLJIT_ADDC: + SLJIT_ASSERT(!(flags & INV_IMM)); + EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP); + + case SLJIT_SUB: + SLJIT_ASSERT(!(flags & INV_IMM)); + if (!(flags & ARGS_SWAPPED)) + EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP); + EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP); + + case SLJIT_SUBC: + SLJIT_ASSERT(!(flags & INV_IMM)); + if (!(flags & ARGS_SWAPPED)) + EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP); + EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP); + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & INV_IMM)); + SLJIT_ASSERT(!(src2 & SRC2_IMM)); + if (SLJIT_UNLIKELY(op & SLJIT_SET_O)) + mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12); + else + mul_inst = MUL | (reg_map[dst] << 16); + + if (dst != src2) + FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2])); + else if (dst != src1) + FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1])); + else { + /* Rm and Rd must not be the same register. */ + SLJIT_ASSERT(dst != TMP_REG1); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2]))); + FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1])); + } + + if (!(op & SLJIT_SET_O)) + return SLJIT_SUCCESS; + + /* We need to use TMP_REG3. */ + compiler->cache_arg = 0; + compiler->cache_argw = 0; + /* cmp TMP_REG2, dst asr #31. */ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0)); + + case SLJIT_AND: + if (!(flags & INV_IMM)) + EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP); + EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP); + + case SLJIT_OR: + SLJIT_ASSERT(!(flags & INV_IMM)); + EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP); + + case SLJIT_XOR: + SLJIT_ASSERT(!(flags & INV_IMM)); + EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP); + + case SLJIT_SHL: + EMIT_SHIFT_INS_AND_RETURN(0); + + case SLJIT_LSHR: + EMIT_SHIFT_INS_AND_RETURN(1); + + case SLJIT_ASHR: + EMIT_SHIFT_INS_AND_RETURN(2); + } + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +#undef EMIT_DATA_PROCESS_INS_AND_RETURN +#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN +#undef EMIT_SHIFT_INS_AND_RETURN + +/* Tests whether the immediate can be stored in the 12 bit imm field. + Returns with 0 if not possible. */ +static sljit_uw get_imm(sljit_uw imm) +{ + sljit_si rol; + + if (imm <= 0xff) + return SRC2_IMM | imm; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol = 8; + } + else { + imm = (imm << 24) | (imm >> 8); + rol = 0; + } + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + return SRC2_IMM | (imm >> 24) | (rol << 8); + else + return 0; +} + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) +static sljit_si generate_int(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm, sljit_si positive) +{ + sljit_uw mask; + sljit_uw imm1; + sljit_uw imm2; + sljit_si rol; + + /* Step1: Search a zero byte (8 continous zero bit). */ + mask = 0xff000000; + rol = 8; + while(1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = 4 + (rol >> 1); + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) { + /* rol by 8. */ + imm = (imm << 8) | (imm >> 24); + mask = 0xff00; + rol = 24; + while (1) { + if (!(imm & mask)) { + /* Rol imm by rol. */ + imm = (imm << rol) | (imm >> (32 - rol)); + /* Calculate arm rol. */ + rol = (rol >> 1) - 8; + break; + } + rol += 2; + mask >>= 2; + if (mask & 0x3) + return 0; + } + break; + } + } + + /* The low 8 bit must be zero. */ + SLJIT_ASSERT(!(imm & 0xff)); + + if (!(imm & 0xff000000)) { + imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8); + imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8); + } + else if (imm & 0xc0000000) { + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xff000000)) { + imm <<= 8; + rol += 4; + } + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + else { + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8); + imm <<= 8; + rol += 4; + + if (!(imm & 0xf0000000)) { + imm <<= 4; + rol += 2; + } + + if (!(imm & 0xc0000000)) { + imm <<= 2; + rol += 1; + } + + if (!(imm & 0x00ffffff)) + imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8); + else + return 0; + } + + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1))); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2))); + return 1; +} +#endif + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_uw imm) +{ + sljit_uw tmp; + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + if (!(imm & ~0xffff)) + return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)); +#endif + + /* Create imm by 1 inst. */ + tmp = get_imm(imm); + if (tmp) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp)); + + tmp = get_imm(~imm); + if (tmp) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp)); + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + /* Create imm by 2 inst. */ + FAIL_IF(generate_int(compiler, reg, imm, 1)); + FAIL_IF(generate_int(compiler, reg, ~imm, 0)); + + /* Load integer. */ + return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm); +#else + return emit_imm(compiler, reg, imm); +#endif +} + +/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ +static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value) +{ + if (value >= 0) { + value = get_imm(value); + if (value) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value)); + } + else { + value = get_imm(-value); + if (value) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value)); + } + return SLJIT_ERR_UNSUPPORTED; +} + +/* Can perform an operation using at most 1 instruction. */ +static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + sljit_uw imm; + + if (arg & SLJIT_IMM) { + imm = get_imm(argw); + if (imm) { + if (inp_flags & ARG_TEST) + return 1; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm))); + return -1; + } + imm = get_imm(~argw); + if (imm) { + if (inp_flags & ARG_TEST) + return 1; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm))); + return -1; + } + return 0; + } + + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads/stores. */ + if (!(arg & REG_MASK)) + return 0; + + if (arg & OFFS_REG_MASK) { + if ((argw & 0x3) != 0 && !IS_TYPE1_TRANSFER(inp_flags)) + return 0; + + if (inp_flags & ARG_TEST) + return 1; + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, + RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)))); + return -1; + } + + if (IS_TYPE1_TRANSFER(inp_flags)) { + if (argw >= 0 && argw <= 0xfff) { + if (inp_flags & ARG_TEST) + return 1; + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw))); + return -1; + } + if (argw < 0 && argw >= -0xfff) { + if (inp_flags & ARG_TEST) + return 1; + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw))); + return -1; + } + } + else { + if (argw >= 0 && argw <= 0xff) { + if (inp_flags & ARG_TEST) + return 1; + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); + return -1; + } + if (argw < 0 && argw >= -0xff) { + if (inp_flags & ARG_TEST) + return 1; + argw = -argw; + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); + return -1; + } + } + + return 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + /* Immediate caching is not supported as it would be an operation on constant arguments. */ + if (arg & SLJIT_IMM) + return 0; + + /* Always a simple operation. */ + if (arg & OFFS_REG_MASK) + return 0; + + if (!(arg & REG_MASK)) { + /* Immediate access. */ + if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) + return 1; + return 0; + } + + if (argw <= 0xfffff && argw >= -0xfffff) + return 0; + + if (argw == next_argw && (next_arg & SLJIT_MEM)) + return 1; + + if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) + return 1; + + return 0; +} + +#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \ + if (max_delta & 0xf00) \ + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \ + else \ + FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm)))); + +#define TEST_WRITE_BACK() \ + if (inp_flags & WRITE_BACK) { \ + tmp_r = arg & REG_MASK; \ + if (reg == tmp_r) { \ + /* This can only happen for stores */ \ + /* since ldr reg, [reg, ...]! has no meaning */ \ + SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \ + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \ + reg = TMP_REG3; \ + } \ + } + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_si tmp_r; + sljit_sw max_delta; + sljit_sw sign; + sljit_uw imm; + + if (arg & SLJIT_IMM) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + return load_immediate(compiler, reg, argw); + } + + SLJIT_ASSERT(arg & SLJIT_MEM); + + tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3; + max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff; + + if ((arg & REG_MASK) == SLJIT_UNUSED) { + /* Write back is not used. */ + imm = (sljit_uw)(argw - compiler->cache_argw); + if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { + if (imm <= (sljit_uw)max_delta) { + sign = 1; + argw = argw - compiler->cache_argw; + } + else { + sign = 0; + argw = compiler->cache_argw - argw; + } + + GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw); + return SLJIT_SUCCESS; + } + + /* With write back, we can create some sophisticated loads, but + it is hard to decide whether we should convert downward (0s) or upward (1s). */ + imm = (sljit_uw)(argw - next_argw); + if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } + + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0); + return SLJIT_SUCCESS; + } + + if (arg & OFFS_REG_MASK) { + SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00)); + if (inp_flags & WRITE_BACK) + tmp_r = arg & REG_MASK; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); + } + + imm = (sljit_uw)(argw - compiler->cache_argw); + if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) { + SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); + GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm); + return SLJIT_SUCCESS; + } + if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) { + SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); + imm = (sljit_uw)-(sljit_sw)imm; + GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm); + return SLJIT_SUCCESS; + } + + imm = get_imm(argw & ~max_delta); + if (imm) { + TEST_WRITE_BACK(); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm))); + GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); + return SLJIT_SUCCESS; + } + + imm = get_imm(-argw & ~max_delta); + if (imm) { + argw = -argw; + TEST_WRITE_BACK(); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm))); + GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); + return SLJIT_SUCCESS; + } + + if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { + TEST_WRITE_BACK(); + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); + } + + if (argw == next_argw && (next_arg & SLJIT_MEM)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + + TEST_WRITE_BACK(); + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); + } + + imm = (sljit_uw)(argw - next_argw); + if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]))); + + compiler->cache_arg = arg; + compiler->cache_argw = argw; + + GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0); + return SLJIT_SUCCESS; + } + + if ((arg & REG_MASK) == tmp_r) { + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } + + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); +} + +static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg, arg, argw, 0, 0); +} + +static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si inp_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + + /* We prefers register and simple consts. */ + sljit_si dst_r; + sljit_si src1_r; + sljit_si src2_r = 0; + sljit_si sugg_src2_r = TMP_REG2; + sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + /* Destination check. */ + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + dst_r = TMP_REG2; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } + else { + SLJIT_ASSERT(dst & SLJIT_MEM); + if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) { + flags |= FAST_DEST; + dst_r = TMP_REG2; + } + else { + flags |= SLOW_DEST; + dst_r = 0; + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_r = src1; + else if (FAST_IS_REG(src2)) { + flags |= ARGS_SWAPPED; + src1_r = src2; + src2 = src1; + src2w = src1w; + } + else do { /* do { } while(0) is used because of breaks. */ + src1_r = 0; + if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) { + /* The second check will generate a hit. */ + src2_r = get_imm(src1w); + if (src2_r) { + flags |= ARGS_SWAPPED; + src1 = src2; + src1w = src2w; + break; + } + if (inp_flags & ALLOW_INV_IMM) { + src2_r = get_imm(~src1w); + if (src2_r) { + flags |= ARGS_SWAPPED | INV_IMM; + src1 = src2; + src1w = src2w; + break; + } + } + if (GET_OPCODE(op) == SLJIT_ADD) { + src2_r = get_imm(-src1w); + if (src2_r) { + /* Note: ARGS_SWAPPED is intentionally not applied! */ + src1 = src2; + src1w = src2w; + op = SLJIT_SUB | GET_ALL_FLAGS(op); + break; + } + } + } + + if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1_r = TMP_REG1; + } + } while (0); + + /* Source 2. */ + if (src2_r == 0) { + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + dst_r = src2_r; + } + else do { /* do { } while(0) is used because of breaks. */ + if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) { + src2_r = get_imm(src2w); + if (src2_r) + break; + if (inp_flags & ALLOW_INV_IMM) { + src2_r = get_imm(~src2w); + if (src2_r) { + flags |= INV_IMM; + break; + } + } + if (GET_OPCODE(op) == SLJIT_ADD) { + src2_r = get_imm(-src2w); + if (src2_r) { + op = SLJIT_SUB | GET_ALL_FLAGS(op); + flags &= ~ARGS_SWAPPED; + break; + } + } + if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) { + src2_r = get_imm(-src2w); + if (src2_r) { + op = SLJIT_ADD | GET_ALL_FLAGS(op); + flags &= ~ARGS_SWAPPED; + break; + } + } + } + + /* src2_r is 0. */ + if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { + FAIL_IF(compiler->error); + src2_r = sugg_src2_r; + } + } while (0); + } + + /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero. + If they are zero, they must not be registers. */ + if (src1_r == 0 && src2_r == 0 && dst_r == 0) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); + flags |= ARGS_SWAPPED; + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + src1_r = TMP_REG1; + src2_r = TMP_REG2; + } + else if (src1_r == 0 && src2_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + src1_r = TMP_REG1; + } + else if (src1_r == 0 && dst_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + src1_r = TMP_REG1; + } + else if (src2_r == 0 && dst_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + src2_r = sugg_src2_r; + } + + if (dst_r == 0) + dst_r = TMP_REG2; + + if (src1_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); + src1_r = TMP_REG1; + } + + if (src2_r == 0) { + FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); + src2_r = sugg_src2_r; + } + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (flags & (FAST_DEST | SLOW_DEST)) { + if (flags & FAST_DEST) + FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw)); + else + FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0)); + } + return SLJIT_SUCCESS; +} + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator); +extern int __aeabi_idivmod(int numerator, int denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + FAIL_IF(push_inst(compiler, BKPT)); + break; + case SLJIT_NOP: + FAIL_IF(push_inst(compiler, NOP)); + break; + case SLJIT_LUMUL: + case SLJIT_LSMUL: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) + | reg_map[SLJIT_R1]); +#else + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1)))); + return push_inst(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + | (reg_map[SLJIT_R1] << 16) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 8) + | reg_map[TMP_REG1]); +#endif + case SLJIT_LUDIV: + case SLJIT_LSDIV: + if (compiler->scratches >= 3) + FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */)); +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + (op == SLJIT_LUDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + if (compiler->scratches >= 3) + return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */); + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UB: + return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + + case SLJIT_MOV_SB: + return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + + case SLJIT_MOV_UH: + return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + + case SLJIT_MOV_SH: + return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + + case SLJIT_MOVU: + case SLJIT_MOVU_UI: + case SLJIT_MOVU_SI: + case SLJIT_MOVU_P: + return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UB: + return emit_op(compiler, SLJIT_MOV_UB, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + + case SLJIT_MOVU_SB: + return emit_op(compiler, SLJIT_MOV_SB, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + + case SLJIT_MOVU_UH: + return emit_op(compiler, SLJIT_MOV_UH, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + + case SLJIT_MOVU_SH: + return emit_op(compiler, SLJIT_MOV_SH, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw); + + case SLJIT_CLZ: + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_SUB: + case SLJIT_SUBC: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + if (src2 & SLJIT_IMM) { + compiler->shift_imm = src2w & 0x1f; + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w); + } + else { + compiler->shift_imm = 0x20; + return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w); + } + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg << 1; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_uw*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + +/* 0 - no fpu + 1 - vfp */ +static sljit_si arm_fpu_type = -1; + +static void init_compiler(void) +{ + if (arm_fpu_type != -1) + return; + + /* TODO: Only the OS can help to determine the correct fpu type. */ + arm_fpu_type = 1; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + if (arm_fpu_type == -1) + init_compiler(); + return arm_fpu_type; +#endif +} + +#else + +#define arm_fpu_type 1 + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ + /* Always available. */ + return 1; +} + +#endif + +#define FPU_LOAD (1 << 20) +#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ + ((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs)) +#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \ + ((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16)) + +static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + sljit_sw tmp; + sljit_uw imm; + sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD)); + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); + arg = SLJIT_MEM | TMP_REG1; + argw = 0; + } + + /* Fast loads and stores. */ + if ((arg & REG_MASK)) { + if (!(argw & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); + } + + if (compiler->cache_arg == arg) { + tmp = argw - compiler->cache_argw; + if (!(tmp & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2)); + if (!(-tmp & ~0x3fc)) + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); + } + } + + if (arg & REG_MASK) { + if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0)); + } + imm = get_imm(argw & ~0x3fc); + if (imm) { + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm))); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); + } + imm = get_imm(-argw & ~0x3fc); + if (imm) { + argw = -argw; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm))); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); + } + } + + compiler->cache_arg = arg; + compiler->cache_argw = argw; + if (arg & REG_MASK) { + FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]))); + } + else + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_SINGLE_OP, TMP_FREG1, src, 0))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16))); + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_SINGLE_OP, dst_r, TMP_FREG1, 0))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_SINGLE_OP, src1, src2, 0))); + return push_inst(compiler, VMRS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + if (GET_OPCODE(op) != SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; + + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_SINGLE_OP, dst_r, src, 0))); + op ^= SLJIT_SINGLE_OP; + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + op ^= SLJIT_SINGLE_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w)); + src2 = TMP_FREG2; + } + + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + switch (GET_OPCODE(op)) { + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); + break; + + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); + break; + + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); + break; + + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_SINGLE_OP, dst_r, src2, src1))); + break; + } + + if (dst_r == TMP_FREG1) + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw)); + + return SLJIT_SUCCESS; +} + +#undef FPU_LOAD +#undef EMIT_FPU_DATA_TRANSFER +#undef EMIT_FPU_OPERATION + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3))); + + /* Memory. */ + if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw)) + return compiler->error; + /* TMP_REG3 is used for caching. */ + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)))); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)))); + else if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw)) + FAIL_IF(compiler->error); + else { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)))); + } + } + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); + return push_inst(compiler, BLX | RM(TMP_REG3)); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(sljit_si type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_D_EQUAL: + return 0x00000000; + + case SLJIT_NOT_EQUAL: + case SLJIT_MUL_OVERFLOW: + case SLJIT_D_NOT_EQUAL: + return 0x10000000; + + case SLJIT_LESS: + case SLJIT_D_LESS: + return 0x30000000; + + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: + return 0x20000000; + + case SLJIT_GREATER: + case SLJIT_D_GREATER: + return 0x80000000; + + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: + return 0x90000000; + + case SLJIT_SIG_LESS: + return 0xb0000000; + + case SLJIT_SIG_GREATER_EQUAL: + return 0xa0000000; + + case SLJIT_SIG_GREATER: + return 0xc0000000; + + case SLJIT_SIG_LESS_EQUAL: + return 0xd0000000; + + case SLJIT_OVERFLOW: + case SLJIT_D_UNORDERED: + return 0x60000000; + + case SLJIT_NOT_OVERFLOW: + case SLJIT_D_ORDERED: + return 0x70000000; + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); + return 0xe0000000; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + /* In ARM, we don't need to touch the arguments. */ +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type >= SLJIT_FAST_CALL) + PTR_FAIL_IF(prepare_blx(compiler)); + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, + type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->addr = compiler->size; + compiler->patches++; + } + + if (type >= SLJIT_FAST_CALL) { + jump->flags |= IS_BL; + PTR_FAIL_IF(emit_blx(compiler)); + } + + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + jump->addr = compiler->size; +#else + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_BL; + PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type))); + jump->addr = compiler->size; +#endif + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + /* In ARM, we don't need to touch the arguments. */ + if (!(src & SLJIT_IMM)) { + if (FAST_IS_REG(src)) + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); + + SLJIT_ASSERT(src & SLJIT_MEM); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw)); + return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)); + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = srcw; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + if (type >= SLJIT_FAST_CALL) + FAIL_IF(prepare_blx(compiler)); + FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); + if (type >= SLJIT_FAST_CALL) + FAIL_IF(emit_blx(compiler)); +#else + FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); + FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); +#endif + jump->addr = compiler->size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ + sljit_si dst_r, flags = GET_ALL_FLAGS(op); + sljit_uw cc, ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + op = GET_OPCODE(op); + cc = get_cc(type & 0xff); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (op < SLJIT_ADD) { + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0))); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc)); + return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; + } + + ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP)); + if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc)); + /* The condition must always be set, even if the ORR/EOR is not executed above. */ + return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS; + } + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); + src = TMP_REG1; + srcw = 0; + } else if (src & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc)); + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); + if (dst_r == TMP_REG2) + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0)); + + return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_si reg; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + + reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + +#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) + PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value)); + compiler->patches++; +#else + PTR_FAIL_IF(emit_imm(compiler, reg, init_value)); +#endif + set_const(const_, compiler); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + inline_set_jump_addr(addr, new_addr, 1); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + inline_set_const(addr, new_constant, 1); +} diff --git a/src/sljit/sljitNativeARM_64.c b/src/sljit/sljitNativeARM_64.c new file mode 100644 index 0000000..c5251be --- /dev/null +++ b/src/sljit/sljitNativeARM_64.c @@ -0,0 +1,2019 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ + return "ARM-64" SLJIT_CPUINFO; +} + +/* Length of an instruction word */ +typedef sljit_ui sljit_ins; + +#define TMP_ZERO (0) + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 5) +#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 6) + +#define TMP_FREG1 (0) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { + 31, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 30, 31 +}; + +#define W_OP (1 << 31) +#define RD(rd) (reg_map[rd]) +#define RT(rt) (reg_map[rt]) +#define RN(rn) (reg_map[rn] << 5) +#define RT2(rt2) (reg_map[rt2] << 10) +#define RM(rm) (reg_map[rm] << 16) +#define VD(vd) (vd) +#define VT(vt) (vt) +#define VN(vn) ((vn) << 5) +#define VM(vm) ((vm) << 16) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define ADC 0x9a000000 +#define ADD 0x8b000000 +#define ADDI 0x91000000 +#define AND 0x8a000000 +#define ANDI 0x92000000 +#define ASRV 0x9ac02800 +#define B 0x14000000 +#define B_CC 0x54000000 +#define BL 0x94000000 +#define BLR 0xd63f0000 +#define BR 0xd61f0000 +#define BRK 0xd4200000 +#define CBZ 0xb4000000 +#define CLZ 0xdac01000 +#define CSINC 0x9a800400 +#define EOR 0xca000000 +#define EORI 0xd2000000 +#define FABS 0x1e60c000 +#define FADD 0x1e602800 +#define FCMP 0x1e602000 +#define FCVT 0x1e224000 +#define FCVTZS 0x9e780000 +#define FDIV 0x1e601800 +#define FMOV 0x1e604000 +#define FMUL 0x1e600800 +#define FNEG 0x1e614000 +#define FSUB 0x1e603800 +#define LDRI 0xf9400000 +#define LDP 0xa9400000 +#define LDP_PST 0xa8c00000 +#define LSLV 0x9ac02000 +#define LSRV 0x9ac02400 +#define MADD 0x9b000000 +#define MOVK 0xf2800000 +#define MOVN 0x92800000 +#define MOVZ 0xd2800000 +#define NOP 0xd503201f +#define ORN 0xaa200000 +#define ORR 0xaa000000 +#define ORRI 0xb2000000 +#define RET 0xd65f0000 +#define SBC 0xda000000 +#define SBFM 0x93000000 +#define SCVTF 0x9e620000 +#define SDIV 0x9ac00c00 +#define SMADDL 0x9b200000 +#define SMULH 0x9b403c00 +#define STP 0xa9000000 +#define STP_PRE 0xa9800000 +#define STRI 0xf9000000 +#define STR_FI 0x3d000000 +#define STR_FR 0x3c206800 +#define STUR_FI 0x3c000000 +#define SUB 0xcb000000 +#define SUBI 0xd1000000 +#define SUBS 0xeb000000 +#define UBFM 0xd3000000 +#define UDIV 0x9ac00800 +#define UMULH 0x9bc03c00 + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_imm64_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm) +{ + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21))); + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21))); + return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21)); +} + +static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm) +{ + sljit_si dst = inst[0] & 0x1f; + SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21))); + inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5); + inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21); + inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21); + inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21); +} + +static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +{ + sljit_sw diff; + sljit_uw target_addr; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) { + jump->flags |= PATCH_ABS64; + return 0; + } + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size); + } + diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4); + + if (jump->flags & IS_COND) { + diff += sizeof(sljit_ins); + if (diff <= 0xfffff && diff >= -0x100000) { + code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1; + jump->addr -= sizeof(sljit_ins); + jump->flags |= PATCH_COND; + return 5; + } + diff -= sizeof(sljit_ins); + } + + if (diff <= 0x7ffffff && diff >= -0x8000000) { + jump->flags |= PATCH_B; + return 4; + } + + if (target_addr <= 0xffffffffl) { + if (jump->flags & IS_COND) + code_ptr[-5] -= (2 << 5); + code_ptr[-2] = code_ptr[0]; + return 2; + } + if (target_addr <= 0xffffffffffffl) { + if (jump->flags & IS_COND) + code_ptr[-5] -= (1 << 5); + jump->flags |= PATCH_ABS48; + code_ptr[-1] = code_ptr[0]; + return 1; + } + + jump->flags |= PATCH_ABS64; + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw addr; + sljit_si dst; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + /* These structures are ordered by their address. */ + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { + jump->addr = (sljit_uw)(code_ptr - 4); + code_ptr -= detect_jump_type(jump, code_ptr, code); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins*)jump->addr; + if (jump->flags & PATCH_B) { + addr = (sljit_sw)(addr - jump->addr) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); + buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); + if (jump->flags & IS_COND) + buf_ptr[-1] -= (4 << 5); + break; + } + if (jump->flags & PATCH_COND) { + addr = (sljit_sw)(addr - jump->addr) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); + buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); + break; + } + + SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl); + SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl); + + dst = buf_ptr[0] & 0x1f; + buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5); + buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21); + if (jump->flags & (PATCH_ABS48 | PATCH_ABS64)) + buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21); + if (jump->flags & PATCH_ABS64) + buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21); + } while (0); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + SLJIT_CACHE_FLUSH(code, code_ptr); + return code; +} + +/* --------------------------------------------------------------------- */ +/* Core code generator functions. */ +/* --------------------------------------------------------------------- */ + +#define COUNT_TRAILING_ZERO(value, result) \ + result = 0; \ + if (!(value & 0xffffffff)) { \ + result += 32; \ + value >>= 32; \ + } \ + if (!(value & 0xffff)) { \ + result += 16; \ + value >>= 16; \ + } \ + if (!(value & 0xff)) { \ + result += 8; \ + value >>= 8; \ + } \ + if (!(value & 0xf)) { \ + result += 4; \ + value >>= 4; \ + } \ + if (!(value & 0x3)) { \ + result += 2; \ + value >>= 2; \ + } \ + if (!(value & 0x1)) { \ + result += 1; \ + value >>= 1; \ + } + +#define LOGICAL_IMM_CHECK 0x100 + +static sljit_ins logical_imm(sljit_sw imm, sljit_si len) +{ + sljit_si negated, ones, right; + sljit_uw mask, uimm; + sljit_ins ins; + + if (len & LOGICAL_IMM_CHECK) { + len &= ~LOGICAL_IMM_CHECK; + if (len == 32 && (imm == 0 || imm == -1)) + return 0; + if (len == 16 && ((sljit_si)imm == 0 || (sljit_si)imm == -1)) + return 0; + } + + SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1) + || (len == 16 && (sljit_si)imm != 0 && (sljit_si)imm != -1)); + uimm = (sljit_uw)imm; + while (1) { + if (len <= 0) { + SLJIT_ASSERT_STOP(); + return 0; + } + mask = ((sljit_uw)1 << len) - 1; + if ((uimm & mask) != ((uimm >> len) & mask)) + break; + len >>= 1; + } + + len <<= 1; + + negated = 0; + if (uimm & 0x1) { + negated = 1; + uimm = ~uimm; + } + + if (len < 64) + uimm &= ((sljit_uw)1 << len) - 1; + + /* Unsigned right shift. */ + COUNT_TRAILING_ZERO(uimm, right); + + /* Signed shift. We also know that the highest bit is set. */ + imm = (sljit_sw)~uimm; + SLJIT_ASSERT(imm < 0); + + COUNT_TRAILING_ZERO(imm, ones); + + if (~imm) + return 0; + + if (len == 64) + ins = 1 << 22; + else + ins = (0x3f - ((len << 1) - 1)) << 10; + + if (negated) + return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16); + + return ins | ((ones - 1) << 10) | ((len - right) << 16); +} + +#undef COUNT_TRAILING_ZERO + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw simm) +{ + sljit_uw imm = (sljit_uw)simm; + sljit_si i, zeros, ones, first; + sljit_ins bitmask; + + if (imm <= 0xffff) + return push_inst(compiler, MOVZ | RD(dst) | (imm << 5)); + + if (simm >= -0x10000 && simm < 0) + return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)); + + if (imm <= 0xffffffffl) { + if ((imm & 0xffff0000l) == 0xffff0000) + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5)); + if ((imm & 0xffff) == 0xffff) + return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + bitmask = logical_imm(simm, 16); + if (bitmask != 0) + return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask); + } + else { + bitmask = logical_imm(simm, 32); + if (bitmask != 0) + return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask); + } + + if (imm <= 0xffffffffl) { + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + } + + if (simm >= -0x100000000l && simm < 0) { + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5))); + return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21)); + } + + /* A large amount of number can be constructed from ORR and MOVx, + but computing them is costly. We don't */ + + zeros = 0; + ones = 0; + for (i = 4; i > 0; i--) { + if ((simm & 0xffff) == 0) + zeros++; + if ((simm & 0xffff) == 0xffff) + ones++; + simm >>= 16; + } + + simm = (sljit_sw)imm; + first = 1; + if (ones > zeros) { + simm = ~simm; + for (i = 0; i < 4; i++) { + if (!(simm & 0xffff)) { + simm >>= 16; + continue; + } + if (first) { + first = 0; + FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + } + else + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21))); + simm >>= 16; + } + return SLJIT_SUCCESS; + } + + for (i = 0; i < 4; i++) { + if (!(simm & 0xffff)) { + simm >>= 16; + continue; + } + if (first) { + first = 0; + FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + } + else + FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21))); + simm >>= 16; + } + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +#define INT_OP 0x0040000 +#define SET_FLAGS 0x0080000 +#define UNUSED_RETURN 0x0100000 +#define SLOW_DEST 0x0200000 +#define SLOW_SRC1 0x0400000 +#define SLOW_SRC2 0x0800000 + +#define CHECK_FLAGS(flag_bits) \ + if (flags & SET_FLAGS) { \ + inv_bits |= flag_bits; \ + if (flags & UNUSED_RETURN) \ + dst = TMP_ZERO; \ + } + +static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_sw arg1, sljit_sw arg2) +{ + /* dst must be register, TMP_REG1 + arg1 must be register, TMP_REG1, imm + arg2 must be register, TMP_REG2, imm */ + sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0; + sljit_ins inst_bits; + sljit_si op = (flags & 0xffff); + sljit_si reg; + sljit_sw imm, nimm; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates. */ + flags &= ~ARG1_IMM; + if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB) + arg1 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (flags & ARG2_IMM) ? arg1 : arg2; + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (op) { + case SLJIT_MUL: + case SLJIT_NEG: + case SLJIT_CLZ: + case SLJIT_ADDC: + case SLJIT_SUBC: + /* No form with immediate operand (except imm 0, which + is represented by a ZERO register). */ + break; + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); + return load_immediate(compiler, dst, imm); + case SLJIT_NOT: + SLJIT_ASSERT(flags & ARG2_IMM); + FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); + goto set_flags; + case SLJIT_SUB: + if (flags & ARG1_IMM) + break; + imm = -imm; + /* Fall through. */ + case SLJIT_ADD: + if (imm == 0) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg)); + } + if (imm > 0 && imm <= 0xfff) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10)); + } + nimm = -imm; + if (nimm > 0 && nimm <= 0xfff) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10)); + } + if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)); + } + if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) { + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)); + } + if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) { + FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10)); + } + if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) { + FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22))); + return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10)); + } + break; + case SLJIT_AND: + inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); + if (!inst_bits) + break; + CHECK_FLAGS(3 << 29); + return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits); + case SLJIT_OR: + case SLJIT_XOR: + inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); + if (!inst_bits) + break; + if (op == SLJIT_OR) + inst_bits |= ORRI; + else + inst_bits |= EORI; + FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg))); + goto set_flags; + case SLJIT_SHL: + if (flags & ARG1_IMM) + break; + if (flags & INT_OP) { + imm &= 0x1f; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10))); + } + else { + imm &= 0x3f; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10))); + } + goto set_flags; + case SLJIT_LSHR: + case SLJIT_ASHR: + if (flags & ARG1_IMM) + break; + if (op == SLJIT_ASHR) + inv_bits |= 1 << 30; + if (flags & INT_OP) { + imm &= 0x1f; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10))); + } + else { + imm &= 0x3f; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10))); + } + goto set_flags; + default: + SLJIT_ASSERT_STOP(); + break; + } + + if (flags & ARG2_IMM) { + if (arg2 == 0) + arg2 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); + arg2 = TMP_REG2; + } + } + else { + if (arg1 == 0) + arg1 = TMP_ZERO; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + } + + /* Both arguments are registers. */ + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + case SLJIT_MOVU: + case SLJIT_MOVU_P: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (dst == arg2) + return SLJIT_SUCCESS; + return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + case SLJIT_MOV_UB: + case SLJIT_MOVU_UB: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10)); + case SLJIT_MOV_SB: + case SLJIT_MOVU_SB: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (!(flags & INT_OP)) + inv_bits |= 1 << 22; + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); + case SLJIT_MOV_UH: + case SLJIT_MOVU_UH: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV_SH: + case SLJIT_MOVU_SH: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (!(flags & INT_OP)) + inv_bits |= 1 << 22; + return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); + case SLJIT_MOV_UI: + case SLJIT_MOVU_UI: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if ((flags & INT_OP) && dst == arg2) + return SLJIT_SUCCESS; + return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + case SLJIT_MOV_SI: + case SLJIT_MOVU_SI: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if ((flags & INT_OP) && dst == arg2) + return SLJIT_SUCCESS; + return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); + case SLJIT_NOT: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); + goto set_flags; + case SLJIT_NEG: + SLJIT_ASSERT(arg1 == TMP_REG1); + if (flags & SET_FLAGS) + inv_bits |= 1 << 29; + return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2))); + goto set_flags; + case SLJIT_ADD: + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_ADDC: + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_SUB: + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_SUBC: + CHECK_FLAGS(1 << 29); + return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_MUL: + if (!(flags & SET_FLAGS)) + return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)); + if (flags & INT_OP) { + FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10))); + FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); + } + FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2))); + FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO))); + return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10)); + case SLJIT_AND: + CHECK_FLAGS(3 << 29); + return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); + case SLJIT_OR: + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + goto set_flags; + case SLJIT_XOR: + FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + goto set_flags; + case SLJIT_SHL: + FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + goto set_flags; + case SLJIT_LSHR: + FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + goto set_flags; + case SLJIT_ASHR: + FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); + goto set_flags; + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + +set_flags: + if (flags & SET_FLAGS) + return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO)); + return SLJIT_SUCCESS; +} + +#define STORE 0x01 +#define SIGNED 0x02 + +#define UPDATE 0x04 +#define ARG_TEST 0x08 + +#define BYTE_SIZE 0x000 +#define HALF_SIZE 0x100 +#define INT_SIZE 0x200 +#define WORD_SIZE 0x300 + +#define MEM_SIZE_SHIFT(flags) ((flags) >> 8) + +static SLJIT_CONST sljit_ins sljit_mem_imm[4] = { +/* u l */ 0x39400000 /* ldrb [reg,imm] */, +/* u s */ 0x39000000 /* strb [reg,imm] */, +/* s l */ 0x39800000 /* ldrsb [reg,imm] */, +/* s s */ 0x39000000 /* strb [reg,imm] */, +}; + +static SLJIT_CONST sljit_ins sljit_mem_simm[4] = { +/* u l */ 0x38400000 /* ldurb [reg,imm] */, +/* u s */ 0x38000000 /* sturb [reg,imm] */, +/* s l */ 0x38800000 /* ldursb [reg,imm] */, +/* s s */ 0x38000000 /* sturb [reg,imm] */, +}; + +static SLJIT_CONST sljit_ins sljit_mem_pre_simm[4] = { +/* u l */ 0x38400c00 /* ldrb [reg,imm]! */, +/* u s */ 0x38000c00 /* strb [reg,imm]! */, +/* s l */ 0x38800c00 /* ldrsb [reg,imm]! */, +/* s s */ 0x38000c00 /* strb [reg,imm]! */, +}; + +static SLJIT_CONST sljit_ins sljit_mem_reg[4] = { +/* u l */ 0x38606800 /* ldrb [reg,reg] */, +/* u s */ 0x38206800 /* strb [reg,reg] */, +/* s l */ 0x38a06800 /* ldrsb [reg,reg] */, +/* s s */ 0x38206800 /* strb [reg,reg] */, +}; + +/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ +static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value) +{ + if (value >= 0) { + if (value <= 0xfff) + return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10)); + if (value <= 0xffffff && !(value & 0xfff)) + return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2)); + } + else { + value = -value; + if (value <= 0xfff) + return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10)); + if (value <= 0xffffff && !(value & 0xfff)) + return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2)); + } + return SLJIT_ERR_UNSUPPORTED; +} + +/* Can perform an operation using at most 1 instruction. */ +static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + sljit_ui shift = MEM_SIZE_SHIFT(flags); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(flags & UPDATE)) { + if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) { + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + arg &= REG_MASK; + argw &= 0x1ff; + FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3] + | (shift << 30) | RT(reg) | RN(arg) | (argw << 12))); + return -1; + } + return 0; + } + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + if (argw && argw != shift) + return 0; + + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) + | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0))); + return -1; + } + + arg &= REG_MASK; + if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) { + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) + | RT(reg) | RN(arg) | (argw << (10 - shift)))); + return -1; + } + + if (argw > 255 || argw < -256) + return 0; + + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30) + | RT(reg) | RN(arg) | ((argw & 0x1ff) << 12))); + return -1; +} + +/* see getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_sw diff; + if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) + return 0; + + if (!(arg & REG_MASK)) { + diff = argw - next_argw; + if (diff <= 0xfff && diff >= -0xfff) + return 1; + return 0; + } + + if (argw == next_argw) + return 1; + + diff = argw - next_argw; + if (arg == next_arg && diff <= 0xfff && diff >= -0xfff) + return 1; + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, + sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_ui shift = MEM_SIZE_SHIFT(flags); + sljit_si tmp_r, other_r; + sljit_sw diff; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + tmp_r = (flags & STORE) ? TMP_REG3 : reg; + + if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) { + /* Update only applies if a base register exists. */ + other_r = OFFS_REG(arg); + if (!other_r) { + other_r = arg & REG_MASK; + if (other_r != reg && argw >= 0 && argw <= 0xffffff) { + if ((argw & 0xfff) != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); + if (argw >> 12) + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); + return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); + } + else if (other_r != reg && argw < 0 && argw >= -0xffffff) { + argw = -argw; + if ((argw & 0xfff) != 0) + FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); + if (argw >> 12) + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); + return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); + } + + if (compiler->cache_arg == SLJIT_MEM) { + if (argw == compiler->cache_argw) { + other_r = TMP_REG3; + argw = 0; + } + else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + other_r = TMP_REG3; + argw = 0; + } + } + + if (argw) { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + other_r = TMP_REG3; + argw = 0; + } + } + + /* No caching here. */ + arg &= REG_MASK; + argw &= 0x3; + if (!argw || argw == shift) { + FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r) | (argw ? (1 << 12) : 0))); + return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10)); + } + if (arg != reg) { + FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10))); + return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg)); + } + FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(arg) | RM(other_r) | (argw << 10))); + FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_LR))); + return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_LR)); + } + + if (arg & OFFS_REG_MASK) { + other_r = OFFS_REG(arg); + arg &= REG_MASK; + FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10))); + return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r)); + } + + if (compiler->cache_arg == arg) { + diff = argw - compiler->cache_argw; + if (diff <= 255 && diff >= -256) + return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30) + | RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg)); + } + } + + if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10))); + return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) + | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift))); + } + + diff = argw - next_argw; + next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0; + arg &= REG_MASK; + + if (arg && compiler->cache_arg == SLJIT_MEM) { + if (compiler->cache_argw == argw) + return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); + } + } + + compiler->cache_argw = argw; + if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_arg = SLJIT_MEM | arg; + arg = 0; + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + compiler->cache_arg = SLJIT_MEM; + + if (next_arg) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg))); + compiler->cache_arg = SLJIT_MEM | arg; + arg = 0; + } + } + + if (arg) + return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); + return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3)); +} + +static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg, arg, argw, 0, 0); +} + +static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si i, tmp, offs, prev, saved_regs_size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0); + local_size += saved_regs_size + SLJIT_LOCALS_OFFSET; + local_size = (local_size + 15) & ~0xf; + compiler->local_size = local_size; + + if (local_size <= (63 * sizeof(sljit_sw))) { + FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) + | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); + offs = (local_size - saved_regs_size) << (15 - 3); + } else { + compiler->local_size += 2 * sizeof(sljit_sw); + local_size -= saved_regs_size; + saved_regs_size += 2 * sizeof(sljit_sw); + FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) + | RN(TMP_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15))); + offs = 2 << 15; + } + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + prev = -1; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + if (prev != -1) + FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); + + if (compiler->local_size > (63 * sizeof(sljit_sw))) { + /* The local_size is already adjusted by the saved registers. */ + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + if (local_size) + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); + FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10))); + } + + if (args >= 1) + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0))); + if (args >= 2) + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1))); + if (args >= 3) + FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2))); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET; + local_size = (local_size + 15) & ~0xf; + if (local_size > (63 * sizeof(sljit_sw))) + local_size += 2 * sizeof(sljit_sw); + compiler->local_size = local_size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + sljit_si local_size; + sljit_si i, tmp, offs, prev, saved_regs_size; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + local_size = compiler->local_size; + + saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0); + if (local_size <= (63 * sizeof(sljit_sw))) + offs = (local_size - saved_regs_size) << (15 - 3); + else { + saved_regs_size += 2 * sizeof(sljit_sw); + local_size -= saved_regs_size; + if (local_size > 0xfff) { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22))); + local_size &= 0xfff; + } + if (local_size) + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10))); + local_size = saved_regs_size; + offs = 2 << 15; + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + prev = -1; + for (i = SLJIT_S0; i >= tmp; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + if (prev == -1) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); + offs += 2 << 15; + prev = -1; + } + + if (prev != -1) + FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); + + FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) + | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); + + FAIL_IF(push_inst(compiler, RET | RN(TMP_LR))); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ + sljit_ins inv_bits = (op & SLJIT_INT_OP) ? (1 << 31) : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BRK); + case SLJIT_NOP: + return push_inst(compiler, NOP); + case SLJIT_LUMUL: + case SLJIT_LSMUL: + FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (op == SLJIT_LUMUL ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_LUDIV: + case SLJIT_LSDIV: + FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_LUDIV ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); + return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r, flags, mem_flags; + sljit_si op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_UB: + flags = BYTE_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_ub)srcw; + break; + case SLJIT_MOV_SB: + flags = BYTE_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_sb)srcw; + break; + case SLJIT_MOV_UH: + flags = HALF_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_uh)srcw; + break; + case SLJIT_MOV_SH: + flags = HALF_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_sh)srcw; + break; + case SLJIT_MOV_UI: + flags = INT_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_ui)srcw; + break; + case SLJIT_MOV_SI: + flags = INT_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_si)srcw; + break; + case SLJIT_MOVU: + case SLJIT_MOVU_P: + flags = WORD_SIZE | UPDATE; + break; + case SLJIT_MOVU_UB: + flags = BYTE_SIZE | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_ub)srcw; + break; + case SLJIT_MOVU_SB: + flags = BYTE_SIZE | SIGNED | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_sb)srcw; + break; + case SLJIT_MOVU_UH: + flags = HALF_SIZE | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_uh)srcw; + break; + case SLJIT_MOVU_SH: + flags = HALF_SIZE | SIGNED | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_sh)srcw; + break; + case SLJIT_MOVU_UI: + flags = INT_SIZE | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_ui)srcw; + break; + case SLJIT_MOVU_SI: + flags = INT_SIZE | SIGNED | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_si)srcw; + break; + default: + SLJIT_ASSERT_STOP(); + flags = 0; + break; + } + + if (src & SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); + else if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) + FAIL_IF(compiler->error); + else + FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); + } else { + if (dst_r != TMP_REG1) + return emit_op_imm(compiler, op | ((op_flags & SLJIT_INT_OP) ? INT_OP : 0), dst_r, TMP_REG1, src); + dst_r = src; + } + + if (dst & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) + return compiler->error; + else + return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); + } + return SLJIT_SUCCESS; + } + + flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + if (op_flags & SLJIT_INT_OP) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + if (dst == SLJIT_UNUSED) + flags |= UNUSED_RETURN; + + if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw)) + FAIL_IF(compiler->error); + else + FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw)); + src = TMP_REG2; + } + + if (src & SLJIT_IMM) { + flags |= ARG2_IMM; + if (op_flags & SLJIT_INT_OP) + srcw = (sljit_si)srcw; + } else + srcw = src; + + emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw); + + if (dst & SLJIT_MEM) { + if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw)) + return compiler->error; + else + return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r, flags, mem_flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + flags = GET_FLAGS(op) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + if (op & SLJIT_INT_OP) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + if (dst == SLJIT_UNUSED) + flags |= UNUSED_RETURN; + + if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + } + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw)); + + if (src1 & SLJIT_MEM) + src1 = TMP_REG1; + if (src2 & SLJIT_MEM) + src2 = TMP_REG2; + + if (src1 & SLJIT_IMM) + flags |= ARG1_IMM; + else + src1w = src1; + if (src2 & SLJIT_IMM) + flags |= ARG2_IMM; + else + src2w = src2; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif +} + +static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + sljit_ui shift = MEM_SIZE_SHIFT(flags); + sljit_ins ins_bits = (shift << 30); + sljit_si other_r; + sljit_sw diff; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(flags & STORE)) + ins_bits |= 1 << 22; + + if (arg & OFFS_REG_MASK) { + argw &= 3; + if (!argw || argw == shift) + return push_inst(compiler, STR_FR | ins_bits | VT(reg) + | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)); + other_r = OFFS_REG(arg); + arg &= REG_MASK; + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10))); + arg = TMP_REG1; + argw = 0; + } + + arg &= REG_MASK; + if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0) + return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift))); + + if (arg && argw <= 255 && argw >= -256) + return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12)); + + /* Slow cases */ + if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) { + diff = argw - compiler->cache_argw; + if (!arg && diff <= 255 && diff >= -256) + return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + } + } + + if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) { + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + } + + if (arg & REG_MASK) + return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3)); + return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3)); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONVI_FROMD) + inv_bits |= (1 << 31); + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + src = TMP_FREG1; + } + + FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); + + if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) + return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + inv_bits |= (1 << 31); + + if (src & SLJIT_MEM) { + emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw); + src = TMP_REG1; + } else if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, ((op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src))); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src))); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src))); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_SINGLE_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src))); + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE; + sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); + break; + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); + else if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw)); + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, TMP_LR, srcw)); + + return push_inst(compiler, RET | RN(TMP_LR)); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(sljit_si type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_D_EQUAL: + return 0x1; + + case SLJIT_NOT_EQUAL: + case SLJIT_MUL_OVERFLOW: + case SLJIT_D_NOT_EQUAL: + return 0x0; + + case SLJIT_LESS: + case SLJIT_D_LESS: + return 0x2; + + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: + return 0x3; + + case SLJIT_GREATER: + case SLJIT_D_GREATER: + return 0x9; + + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: + return 0x8; + + case SLJIT_SIG_LESS: + return 0xa; + + case SLJIT_SIG_GREATER_EQUAL: + return 0xb; + + case SLJIT_SIG_GREATER: + return 0xd; + + case SLJIT_SIG_LESS_EQUAL: + return 0xc; + + case SLJIT_OVERFLOW: + case SLJIT_D_UNORDERED: + return 0x7; + + case SLJIT_NOT_OVERFLOW: + case SLJIT_D_ORDERED: + return 0x6; + + default: + SLJIT_ASSERT_STOP(); + return 0xe; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type))); + } + else if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_BL; + + PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1))); + + return jump; +} + +static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_si type, + sljit_si src, sljit_sw srcw) +{ + struct sljit_jump *jump; + sljit_ins inv_bits = (type & SLJIT_INT_OP) ? (1 << 31) : 0; + + SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL); + ADJUST_LOCAL_OFFSET(src, srcw); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->flags |= IS_CBZ | IS_COND; + + if (src & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + else if (src & SLJIT_IMM) { + PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + SLJIT_ASSERT(FAST_IS_REG(src)); + + if ((type & 0xff) == SLJIT_EQUAL) + inv_bits |= 1 << 24; + + PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src))); + PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1))); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + /* In ARM, we don't need to touch the arguments. */ + if (!(src & SLJIT_IMM)) { + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw)); + src = TMP_REG1; + } + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src)); + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = srcw; + + FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ + sljit_si dst_r, flags, mem_flags; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + cc = get_cc(type & 0xff); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (GET_OPCODE(op) < SLJIT_ADD) { + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO))); + if (dst_r != TMP_REG1) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw); + } + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + flags = GET_FLAGS(op) ? SET_FLAGS : 0; + mem_flags = WORD_SIZE; + if (op & SLJIT_INT_OP) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw)); + src = TMP_REG1; + srcw = 0; + } else if (src & SLJIT_IMM) + flags |= ARG1_IMM; + + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2); + + if (dst_r != TMP_REG1) + return SLJIT_SUCCESS; + return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_si dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins* inst = (sljit_ins*)addr; + modify_imm64_const(inst, new_addr); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + sljit_ins* inst = (sljit_ins*)addr; + modify_imm64_const(inst, new_constant); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} diff --git a/src/sljit/sljitNativeARM_T2_32.c b/src/sljit/sljitNativeARM_T2_32.c new file mode 100644 index 0000000..6e38cec --- /dev/null +++ b/src/sljit/sljitNativeARM_T2_32.c @@ -0,0 +1,2058 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ + return "ARM-Thumb2" SLJIT_CPUINFO; +} + +/* Length of an instruction word. */ +typedef sljit_ui sljit_ins; + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) + +#define TMP_FREG1 (0) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) + +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15 +}; + +#define COPY_BITS(src, from, to, bits) \ + ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to)) + +/* Thumb16 encodings. */ +#define RD3(rd) (reg_map[rd]) +#define RN3(rn) (reg_map[rn] << 3) +#define RM3(rm) (reg_map[rm] << 6) +#define RDN3(rdn) (reg_map[rdn] << 8) +#define IMM3(imm) (imm << 6) +#define IMM8(imm) (imm) + +/* Thumb16 helpers. */ +#define SET_REGS44(rd, rn) \ + ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4)) +#define IS_2_LO_REGS(reg1, reg2) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7) +#define IS_3_LO_REGS(reg1, reg2, reg3) \ + (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7) + +/* Thumb32 encodings. */ +#define RD4(rd) (reg_map[rd] << 8) +#define RN4(rn) (reg_map[rn] << 16) +#define RM4(rm) (reg_map[rm]) +#define RT4(rt) (reg_map[rt] << 12) +#define DD4(dd) ((dd) << 12) +#define DN4(dn) ((dn) << 16) +#define DM4(dm) (dm) +#define IMM5(imm) \ + (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6)) +#define IMM12(imm) \ + (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +/* dot '.' changed to _ + I immediate form (possibly followed by number of immediate bits). */ +#define ADCI 0xf1400000 +#define ADCS 0x4140 +#define ADC_W 0xeb400000 +#define ADD 0x4400 +#define ADDS 0x1800 +#define ADDSI3 0x1c00 +#define ADDSI8 0x3000 +#define ADD_W 0xeb000000 +#define ADDWI 0xf2000000 +#define ADD_SP 0xb000 +#define ADD_W 0xeb000000 +#define ADD_WI 0xf1000000 +#define ANDI 0xf0000000 +#define ANDS 0x4000 +#define AND_W 0xea000000 +#define ASRS 0x4100 +#define ASRSI 0x1000 +#define ASR_W 0xfa40f000 +#define ASR_WI 0xea4f0020 +#define BICI 0xf0200000 +#define BKPT 0xbe00 +#define BLX 0x4780 +#define BX 0x4700 +#define CLZ 0xfab0f080 +#define CMPI 0x2800 +#define CMP_W 0xebb00f00 +#define EORI 0xf0800000 +#define EORS 0x4040 +#define EOR_W 0xea800000 +#define IT 0xbf00 +#define LSLS 0x4080 +#define LSLSI 0x0000 +#define LSL_W 0xfa00f000 +#define LSL_WI 0xea4f0000 +#define LSRS 0x40c0 +#define LSRSI 0x0800 +#define LSR_W 0xfa20f000 +#define LSR_WI 0xea4f0010 +#define MOV 0x4600 +#define MOVS 0x0000 +#define MOVSI 0x2000 +#define MOVT 0xf2c00000 +#define MOVW 0xf2400000 +#define MOV_W 0xea4f0000 +#define MOV_WI 0xf04f0000 +#define MUL 0xfb00f000 +#define MVNS 0x43c0 +#define MVN_W 0xea6f0000 +#define MVN_WI 0xf06f0000 +#define NOP 0xbf00 +#define ORNI 0xf0600000 +#define ORRI 0xf0400000 +#define ORRS 0x4300 +#define ORR_W 0xea400000 +#define POP 0xbc00 +#define POP_W 0xe8bd0000 +#define PUSH 0xb400 +#define PUSH_W 0xe92d0000 +#define RSB_WI 0xf1c00000 +#define RSBSI 0x4240 +#define SBCI 0xf1600000 +#define SBCS 0x4180 +#define SBC_W 0xeb600000 +#define SMULL 0xfb800000 +#define STR_SP 0x9000 +#define SUBS 0x1a00 +#define SUBSI3 0x1e00 +#define SUBSI8 0x3800 +#define SUB_W 0xeba00000 +#define SUBWI 0xf2a00000 +#define SUB_SP 0xb080 +#define SUB_WI 0xf1a00000 +#define SXTB 0xb240 +#define SXTB_W 0xfa4ff080 +#define SXTH 0xb200 +#define SXTH_W 0xfa0ff080 +#define TST 0x4200 +#define UMULL 0xfba00000 +#define UXTB 0xb2c0 +#define UXTB_W 0xfa5ff080 +#define UXTH 0xb280 +#define UXTH_W 0xfa1ff080 +#define VABS_F32 0xeeb00ac0 +#define VADD_F32 0xee300a00 +#define VCMP_F32 0xeeb40a40 +#define VCVT_F32_S32 0xeeb80ac0 +#define VCVT_F64_F32 0xeeb70ac0 +#define VCVT_S32_F32 0xeebd0ac0 +#define VDIV_F32 0xee800a00 +#define VMOV_F32 0xeeb00a40 +#define VMOV 0xee000a10 +#define VMRS 0xeef1fa10 +#define VMUL_F32 0xee200a00 +#define VNEG_F32 0xeeb10a40 +#define VSTR_F32 0xed000a00 +#define VSUB_F32 0xee300a40 + +static sljit_si push_inst16(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_uh *ptr; + SLJIT_ASSERT(!(inst & 0xffff0000)); + + ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_uh)); + FAIL_IF(!ptr); + *ptr = inst; + compiler->size++; + return SLJIT_SUCCESS; +} + +static sljit_si push_inst32(struct sljit_compiler *compiler, sljit_ins inst) +{ + sljit_uh *ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr++ = inst >> 16; + *ptr = inst; + compiler->size += 2; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_imm32_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm) +{ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | + COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst) | + COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); +} + +static SLJIT_INLINE void modify_imm32_const(sljit_uh *inst, sljit_uw new_imm) +{ + sljit_si dst = inst[1] & 0x0f00; + SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00)); + inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1); + inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff); + inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1); + inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); +} + +static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_uh *code_ptr, sljit_uh *code) +{ + sljit_sw diff; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; + + if (jump->flags & JUMP_ADDR) { + /* Branch to ARM code is not optimized yet. */ + if (!(jump->u.target & 0x1)) + return 0; + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1; + } + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1; + } + + if (jump->flags & IS_COND) { + SLJIT_ASSERT(!(jump->flags & IS_BL)); + if (diff <= 127 && diff >= -128) { + jump->flags |= PATCH_TYPE1; + return 5; + } + if (diff <= 524287 && diff >= -524288) { + jump->flags |= PATCH_TYPE2; + return 4; + } + /* +1 comes from the prefix IT instruction. */ + diff--; + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_TYPE3; + return 3; + } + } + else if (jump->flags & IS_BL) { + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_BL; + return 3; + } + } + else { + if (diff <= 1023 && diff >= -1024) { + jump->flags |= PATCH_TYPE4; + return 4; + } + if (diff <= 8388607 && diff >= -8388608) { + jump->flags |= PATCH_TYPE5; + return 3; + } + } + + return 0; +} + +static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) +{ + sljit_si type = (jump->flags >> 4) & 0xf; + sljit_sw diff; + sljit_uh *jump_inst; + sljit_si s, j1, j2; + + if (SLJIT_UNLIKELY(type == 0)) { + modify_imm32_const((sljit_uh*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target); + return; + } + + if (jump->flags & JUMP_ADDR) { + SLJIT_ASSERT(jump->u.target & 0x1); + diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1; + } + else + diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1; + jump_inst = (sljit_uh*)jump->addr; + + switch (type) { + case 1: + /* Encoding T1 of 'B' instruction */ + SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND)); + jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff); + return; + case 2: + /* Encoding T3 of 'B' instruction */ + SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND)); + jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1); + jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff); + return; + case 3: + SLJIT_ASSERT(jump->flags & IS_COND); + *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8; + diff--; + type = 5; + break; + case 4: + /* Encoding T2 of 'B' instruction */ + SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND)); + jump_inst[0] = 0xe000 | (diff & 0x7ff); + return; + } + + SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608); + + /* Really complex instruction form for branches. */ + s = (diff >> 23) & 0x1; + j1 = (~(diff >> 21) ^ s) & 0x1; + j2 = (~(diff >> 22) ^ s) & 0x1; + jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10); + jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff); + + /* The others have a common form. */ + if (type == 5) /* Encoding T4 of 'B' instruction */ + jump_inst[1] |= 0x9000; + else if (type == 6) /* Encoding T1 of 'BL' instruction */ + jump_inst[1] |= 0xd000; + else + SLJIT_ASSERT_STOP(); +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_uh *code; + sljit_uh *code_ptr; + sljit_uh *buf_ptr; + sljit_uh *buf_end; + sljit_uw half_count; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + half_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + + do { + buf_ptr = (sljit_uh*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 1); + do { + *code_ptr = *buf_ptr++; + /* These structures are ordered by their address. */ + SLJIT_ASSERT(!label || label->size >= half_count); + SLJIT_ASSERT(!jump || jump->addr >= half_count); + SLJIT_ASSERT(!const_ || const_->addr >= half_count); + if (label && label->size == half_count) { + label->addr = ((sljit_uw)code_ptr) | 0x1; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == half_count) { + jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); + code_ptr -= detect_jump_type(jump, code_ptr, code); + jump = jump->next; + } + if (const_ && const_->addr == half_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + half_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == half_count) { + label->addr = ((sljit_uw)code_ptr) | 0x1; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + set_jump_instruction(jump); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_uh); + SLJIT_CACHE_FLUSH(code, code_ptr); + /* Set thumb mode flag. */ + return (void*)((sljit_uw)code | 0x1); +} + +/* --------------------------------------------------------------------- */ +/* Core code generator functions. */ +/* --------------------------------------------------------------------- */ + +#define INVALID_IMM 0x80000000 +static sljit_uw get_imm(sljit_uw imm) +{ + /* Thumb immediate form. */ + sljit_si counter; + + if (imm <= 0xff) + return imm; + + if ((imm & 0xffff) == (imm >> 16)) { + /* Some special cases. */ + if (!(imm & 0xff00)) + return (1 << 12) | (imm & 0xff); + if (!(imm & 0xff)) + return (2 << 12) | ((imm >> 8) & 0xff); + if ((imm & 0xff00) == ((imm & 0xff) << 8)) + return (3 << 12) | (imm & 0xff); + } + + /* Assembly optimization: count leading zeroes? */ + counter = 8; + if (!(imm & 0xffff0000)) { + counter += 16; + imm <<= 16; + } + if (!(imm & 0xff000000)) { + counter += 8; + imm <<= 8; + } + if (!(imm & 0xf0000000)) { + counter += 4; + imm <<= 4; + } + if (!(imm & 0xc0000000)) { + counter += 2; + imm <<= 2; + } + if (!(imm & 0x80000000)) { + counter += 1; + imm <<= 1; + } + /* Since imm >= 128, this must be true. */ + SLJIT_ASSERT(counter <= 31); + + if (imm & 0x00ffffff) + return INVALID_IMM; /* Cannot be encoded. */ + + return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1); +} + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm) +{ + sljit_uw tmp; + + if (imm >= 0x10000) { + tmp = get_imm(imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MOV_WI | RD4(dst) | tmp); + tmp = get_imm(~imm); + if (tmp != INVALID_IMM) + return push_inst32(compiler, MVN_WI | RD4(dst) | tmp); + } + + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) | + COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))); + + /* set hi 16 bit if needed. */ + if (imm >= 0x10000) + return push_inst32(compiler, MOVT | RD4(dst) | + COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16)); + return SLJIT_SUCCESS; +} + +#define ARG1_IMM 0x0010000 +#define ARG2_IMM 0x0020000 +#define KEEP_FLAGS 0x0040000 +/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ +#define SET_FLAGS 0x0100000 +#define UNUSED_RETURN 0x0200000 +#define SLOW_DEST 0x0400000 +#define SLOW_SRC1 0x0800000 +#define SLOW_SRC2 0x1000000 + +static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_uw arg1, sljit_uw arg2) +{ + /* dst must be register, TMP_REG1 + arg1 must be register, TMP_REG1, imm + arg2 must be register, TMP_REG2, imm */ + sljit_si reg; + sljit_uw imm, nimm; + + if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { + /* Both are immediates. */ + flags &= ~ARG1_IMM; + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + + if (flags & (ARG1_IMM | ARG2_IMM)) { + reg = (flags & ARG2_IMM) ? arg1 : arg2; + imm = (flags & ARG2_IMM) ? arg2 : arg1; + + switch (flags & 0xffff) { + case SLJIT_CLZ: + case SLJIT_MUL: + /* No form with immediate operand. */ + break; + case SLJIT_MOV: + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); + return load_immediate(compiler, dst, imm); + case SLJIT_NOT: + if (!(flags & SET_FLAGS)) + return load_immediate(compiler, dst, ~imm); + /* Since the flags should be set, we just fallback to the register mode. + Although some clever things could be done here, "NOT IMM" does not worth the efforts. */ + break; + case SLJIT_ADD: + nimm = -imm; + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (nimm <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(nimm) | RD3(dst) | RN3(reg)); + if (reg == dst) { + if (imm <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst)); + if (nimm <= 0xff) + return push_inst16(compiler, SUBSI8 | IMM8(nimm) | RDN3(dst)); + } + } + if (!(flags & SET_FLAGS)) { + if (imm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm)); + if (nimm <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm)); + } + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_ADDC: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SUB: + if (flags & ARG1_IMM) { + if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst)) + return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + } + nimm = -imm; + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { + if (imm <= 0x7) + return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); + if (nimm <= 0x7) + return push_inst16(compiler, ADDSI3 | IMM3(nimm) | RD3(dst) | RN3(reg)); + if (reg == dst) { + if (imm <= 0xff) + return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst)); + if (nimm <= 0xff) + return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst)); + } + if (imm <= 0xff && (flags & UNUSED_RETURN)) + return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); + } + if (!(flags & SET_FLAGS)) { + if (imm <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm)); + if (nimm <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm)); + } + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SUBC: + if (flags & ARG1_IMM) + break; + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_AND: + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_OR: + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_XOR: + imm = get_imm(imm); + if (imm != INVALID_IMM) + return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + break; + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + if (flags & ARG1_IMM) + break; + imm &= 0x1f; + if (imm == 0) { + if (!(flags & SET_FLAGS)) + return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); + } + switch (flags & 0xffff) { + case SLJIT_SHL: + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + case SLJIT_LSHR: + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + default: /* SLJIT_ASHR */ + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); + return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); + } + default: + SLJIT_ASSERT_STOP(); + break; + } + + if (flags & ARG2_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); + arg2 = TMP_REG2; + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); + arg1 = TMP_REG1; + } + } + + /* Both arguments are registers. */ + switch (flags & 0xffff) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + case SLJIT_MOV_P: + case SLJIT_MOVU: + case SLJIT_MOVU_UI: + case SLJIT_MOVU_SI: + case SLJIT_MOVU_P: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (dst == arg2) + return SLJIT_SUCCESS; + return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); + case SLJIT_MOV_UB: + case SLJIT_MOVU_UB: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_SB: + case SLJIT_MOVU_SB: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_UH: + case SLJIT_MOVU_UH: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_MOV_SH: + case SLJIT_MOVU_SH: + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + if (IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2)); + case SLJIT_NOT: + SLJIT_ASSERT(arg1 == TMP_REG1); + if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2)); + case SLJIT_CLZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); + if (flags & SET_FLAGS) { + if (reg_map[dst] <= 7) + return push_inst16(compiler, CMPI | RDN3(dst)); + return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst)); + } + return SLJIT_SUCCESS; + case SLJIT_ADD: + if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); + if (dst == arg1 && !(flags & SET_FLAGS)) + return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); + return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ADDC: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUB: + if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) + return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); + return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SUBC: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_MUL: + if (!(flags & SET_FLAGS)) + return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); + SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2); + FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); + /* cmp TMP_REG2, dst asr #31. */ + return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); + case SLJIT_AND: + if (!(flags & KEEP_FLAGS)) { + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); + if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); + } + return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_OR: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_XOR: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_SHL: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_LSHR: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + case SLJIT_ASHR: + if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); + return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +#define STORE 0x01 +#define SIGNED 0x02 + +#define WORD_SIZE 0x00 +#define BYTE_SIZE 0x04 +#define HALF_SIZE 0x08 + +#define UPDATE 0x10 +#define ARG_TEST 0x20 + +#define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE))) +#define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift))) + +/* + 1st letter: + w = word + b = byte + h = half + + 2nd letter: + s = signed + u = unsigned + + 3rd letter: + l = load + s = store +*/ + +static SLJIT_CONST sljit_ins sljit_mem16[12] = { +/* w u l */ 0x5800 /* ldr */, +/* w u s */ 0x5000 /* str */, +/* w s l */ 0x5800 /* ldr */, +/* w s s */ 0x5000 /* str */, + +/* b u l */ 0x5c00 /* ldrb */, +/* b u s */ 0x5400 /* strb */, +/* b s l */ 0x5600 /* ldrsb */, +/* b s s */ 0x5400 /* strb */, + +/* h u l */ 0x5a00 /* ldrh */, +/* h u s */ 0x5200 /* strh */, +/* h s l */ 0x5e00 /* ldrsh */, +/* h s s */ 0x5200 /* strh */, +}; + +static SLJIT_CONST sljit_ins sljit_mem16_imm5[12] = { +/* w u l */ 0x6800 /* ldr imm5 */, +/* w u s */ 0x6000 /* str imm5 */, +/* w s l */ 0x6800 /* ldr imm5 */, +/* w s s */ 0x6000 /* str imm5 */, + +/* b u l */ 0x7800 /* ldrb imm5 */, +/* b u s */ 0x7000 /* strb imm5 */, +/* b s l */ 0x0000 /* not allowed */, +/* b s s */ 0x7000 /* strb imm5 */, + +/* h u l */ 0x8800 /* ldrh imm5 */, +/* h u s */ 0x8000 /* strh imm5 */, +/* h s l */ 0x0000 /* not allowed */, +/* h s s */ 0x8000 /* strh imm5 */, +}; + +#define MEM_IMM8 0xc00 +#define MEM_IMM12 0x800000 +static SLJIT_CONST sljit_ins sljit_mem32[12] = { +/* w u l */ 0xf8500000 /* ldr.w */, +/* w u s */ 0xf8400000 /* str.w */, +/* w s l */ 0xf8500000 /* ldr.w */, +/* w s s */ 0xf8400000 /* str.w */, + +/* b u l */ 0xf8100000 /* ldrb.w */, +/* b u s */ 0xf8000000 /* strb.w */, +/* b s l */ 0xf9100000 /* ldrsb.w */, +/* b s s */ 0xf8000000 /* strb.w */, + +/* h u l */ 0xf8300000 /* ldrh.w */, +/* h u s */ 0xf8200000 /* strsh.w */, +/* h s l */ 0xf9300000 /* ldrsh.w */, +/* h s s */ 0xf8200000 /* strsh.w */, +}; + +/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ +static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value) +{ + if (value >= 0) { + if (value <= 0xfff) + return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value)); + value = get_imm(value); + if (value != INVALID_IMM) + return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value); + } + else { + value = -value; + if (value <= 0xfff) + return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value)); + value = get_imm(value); + if (value != INVALID_IMM) + return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value); + } + return SLJIT_ERR_UNSUPPORTED; +} + +/* Can perform an operation using at most 1 instruction. */ +static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + sljit_si other_r, shift; + + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (SLJIT_UNLIKELY(flags & UPDATE)) { + if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 0xff && argw >= -0xff) { + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + flags &= ~UPDATE; + arg &= 0xf; + if (argw >= 0) + argw |= 0x200; + else { + argw = -argw; + } + + SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff); + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw)); + return -1; + } + return 0; + } + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + argw &= 0x3; + other_r = OFFS_REG(arg); + arg &= 0xf; + + if (!argw && IS_3_LO_REGS(reg, arg, other_r)) + FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r))); + else + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4))); + return -1; + } + + if (!(arg & REG_MASK) || argw > 0xfff || argw < -0xff) + return 0; + + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + arg &= 0xf; + if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { + shift = 3; + if (IS_WORD_SIZE(flags)) { + if (OFFSET_CHECK(0x1f, 2)) + shift = 2; + } + else if (flags & BYTE_SIZE) + { + if (OFFSET_CHECK(0x1f, 0)) + shift = 0; + } + else { + SLJIT_ASSERT(flags & HALF_SIZE); + if (OFFSET_CHECK(0x1f, 1)) + shift = 1; + } + + if (shift != 3) { + FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - shift)))); + return -1; + } + } + + /* SP based immediate. */ + if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { + FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2))); + return -1; + } + + if (argw >= 0) + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); + else + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw)); + return -1; +} + +/* see getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_sw diff; + if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) + return 0; + + if (!(arg & REG_MASK)) { + diff = argw - next_argw; + if (diff <= 0xfff && diff >= -0xfff) + return 1; + return 0; + } + + if (argw == next_argw) + return 1; + + diff = argw - next_argw; + if (arg == next_arg && diff <= 0xfff && diff >= -0xfff) + return 1; + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, + sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_si tmp_r, other_r; + sljit_sw diff; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + tmp_r = (flags & STORE) ? TMP_REG3 : reg; + + if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) { + /* Update only applies if a base register exists. */ + /* There is no caching here. */ + other_r = OFFS_REG(arg); + arg &= 0xf; + flags &= ~UPDATE; + + if (!other_r) { + if (!(argw & ~0xfff)) { + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); + return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw)); + } + + if (compiler->cache_arg == SLJIT_MEM) { + if (argw == compiler->cache_argw) { + other_r = TMP_REG3; + argw = 0; + } + else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + other_r = TMP_REG3; + argw = 0; + } + } + + if (argw) { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + other_r = TMP_REG3; + argw = 0; + } + } + + argw &= 0x3; + if (!argw && IS_3_LO_REGS(reg, arg, other_r)) { + FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r))); + return push_inst16(compiler, ADD | SET_REGS44(arg, other_r)); + } + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4))); + return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(other_r) | (argw << 6)); + } + flags &= ~UPDATE; + + SLJIT_ASSERT(!(arg & OFFS_REG_MASK)); + + if (compiler->cache_arg == arg) { + diff = argw - compiler->cache_argw; + if (!(diff & ~0xfff)) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | diff); + if (!((compiler->cache_argw - argw) & ~0xff)) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); + } + } + + next_arg = (arg & REG_MASK) && (arg == next_arg) && (argw != next_argw); + arg &= 0xf; + if (arg && compiler->cache_arg == SLJIT_MEM) { + if (compiler->cache_argw == argw) + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); + } + } + + compiler->cache_argw = argw; + if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_arg = SLJIT_MEM | arg; + arg = 0; + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + compiler->cache_arg = SLJIT_MEM; + + diff = argw - next_argw; + if (next_arg && diff <= 0xfff && diff >= -0xfff) { + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg))); + compiler->cache_arg = SLJIT_MEM | arg; + arg = 0; + } + } + + if (arg) + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); +} + +static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg, arg, argw, 0, 0); +} + +static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si size, i, tmp; + sljit_ins push; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + push = (1 << 4); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + push |= 1 << reg_map[i]; + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + push |= 1 << reg_map[i]; + + FAIL_IF((push & 0xff00) + ? push_inst32(compiler, PUSH_W | (1 << 14) | push) + : push_inst16(compiler, PUSH | (1 << 8) | push)); + + /* Stack must be aligned to 8 bytes: (LR, R4) */ + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + local_size = ((size + local_size + 7) & ~7) - size; + compiler->local_size = local_size; + if (local_size > 0) { + if (local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size)); + } + + if (args >= 1) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0, SLJIT_R0))); + if (args >= 2) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S1, SLJIT_R1))); + if (args >= 3) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2))); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + compiler->local_size = ((size + local_size + 7) & ~7) - size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + sljit_si i, tmp; + sljit_ins pop; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + if (compiler->local_size > 0) { + if (compiler->local_size <= (127 << 2)) + FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2))); + else + FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); + } + + pop = (1 << 4); + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) + pop |= 1 << reg_map[i]; + + for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) + pop |= 1 << reg_map[i]; + + return (pop & 0xff00) + ? push_inst32(compiler, POP_W | (1 << 15) | pop) + : push_inst16(compiler, POP | (1 << 8) | pop); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator); +extern int __aeabi_idivmod(int numerator, int denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst16(compiler, BKPT); + case SLJIT_NOP: + return push_inst16(compiler, NOP); + case SLJIT_LUMUL: + case SLJIT_LSMUL: + return push_inst32(compiler, (op == SLJIT_LUMUL ? UMULL : SMULL) + | (reg_map[SLJIT_R1] << 8) + | (reg_map[SLJIT_R0] << 12) + | (reg_map[SLJIT_R0] << 16) + | reg_map[SLJIT_R1]); + case SLJIT_LUDIV: + case SLJIT_LSDIV: + if (compiler->scratches >= 4) { + FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */)); + FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */)); + } else if (compiler->scratches >= 3) + FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */)); +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + (op == SLJIT_LUDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + if (compiler->scratches >= 4) { + FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */)); + return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */); + } else if (compiler->scratches >= 3) + return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */); + return SLJIT_SUCCESS; + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r, flags; + sljit_si op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + case SLJIT_MOV_P: + flags = WORD_SIZE; + break; + case SLJIT_MOV_UB: + flags = BYTE_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_ub)srcw; + break; + case SLJIT_MOV_SB: + flags = BYTE_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_sb)srcw; + break; + case SLJIT_MOV_UH: + flags = HALF_SIZE; + if (src & SLJIT_IMM) + srcw = (sljit_uh)srcw; + break; + case SLJIT_MOV_SH: + flags = HALF_SIZE | SIGNED; + if (src & SLJIT_IMM) + srcw = (sljit_sh)srcw; + break; + case SLJIT_MOVU: + case SLJIT_MOVU_UI: + case SLJIT_MOVU_SI: + case SLJIT_MOVU_P: + flags = WORD_SIZE | UPDATE; + break; + case SLJIT_MOVU_UB: + flags = BYTE_SIZE | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_ub)srcw; + break; + case SLJIT_MOVU_SB: + flags = BYTE_SIZE | SIGNED | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_sb)srcw; + break; + case SLJIT_MOVU_UH: + flags = HALF_SIZE | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_uh)srcw; + break; + case SLJIT_MOVU_SH: + flags = HALF_SIZE | SIGNED | UPDATE; + if (src & SLJIT_IMM) + srcw = (sljit_sh)srcw; + break; + default: + SLJIT_ASSERT_STOP(); + flags = 0; + break; + } + + if (src & SLJIT_IMM) + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); + else if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) + FAIL_IF(compiler->error); + else + FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); + } else { + if (dst_r != TMP_REG1) + return emit_op_imm(compiler, op, dst_r, TMP_REG1, src); + dst_r = src; + } + + if (dst & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) + return compiler->error; + else + return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); + } + return SLJIT_SUCCESS; + } + + if (op == SLJIT_NEG) { +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); + } + + flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); + if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw)) + FAIL_IF(compiler->error); + else + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw)); + src = TMP_REG2; + } + + if (src & SLJIT_IMM) + flags |= ARG2_IMM; + else + srcw = src; + + emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw); + + if (dst & SLJIT_MEM) { + if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) + return compiler->error; + else + return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r, flags; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); + + if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + } + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); + + if (src1 & SLJIT_MEM) + src1 = TMP_REG1; + if (src2 & SLJIT_MEM) + src2 = TMP_REG2; + + if (src1 & SLJIT_IMM) + flags |= ARG1_IMM; + else + src1w = src1; + if (src2 & SLJIT_IMM) + flags |= ARG2_IMM; + else + src2w = src2; + + if (dst == SLJIT_UNUSED) + flags |= UNUSED_RETURN; + + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg << 1; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + if (size == 2) + return push_inst16(compiler, *(sljit_uh*)instruction); + return push_inst32(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif +} + +#define FPU_LOAD (1 << 20) + +static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + sljit_sw tmp; + sljit_uw imm; + sljit_sw inst = VSTR_F32 | (flags & (SLJIT_SINGLE_OP | FPU_LOAD)); + + SLJIT_ASSERT(arg & SLJIT_MEM); + + /* Fast loads and stores. */ + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6))); + arg = SLJIT_MEM | TMP_REG2; + argw = 0; + } + + if ((arg & REG_MASK) && (argw & 0x3) == 0) { + if (!(argw & ~0x3fc)) + return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | DD4(reg) | (argw >> 2)); + if (!(-argw & ~0x3fc)) + return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2)); + } + + /* Slow cases */ + SLJIT_ASSERT(!(arg & OFFS_REG_MASK)); + if (compiler->cache_arg == arg) { + tmp = argw - compiler->cache_argw; + if (!(tmp & ~0x3fc)) + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2)); + if (!(-tmp & ~0x3fc)) + return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2)); + if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + compiler->cache_argw = argw; + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); + } + } + + if (arg & REG_MASK) { + if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { + FAIL_IF(compiler->error); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); + } + imm = get_imm(argw & ~0x3fc); + if (imm != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); + } + imm = get_imm(-argw & ~0x3fc); + if (imm != INVALID_IMM) { + argw = -argw; + FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm)); + return push_inst32(compiler, inst | RN4(TMP_REG1) | DD4(reg) | ((argw & 0x3fc) >> 2)); + } + } + + compiler->cache_arg = arg; + compiler->cache_argw = argw; + + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + if (arg & REG_MASK) + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & REG_MASK)))); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); + src = TMP_FREG1; + } + + FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_SINGLE_OP) | DD4(TMP_FREG1) | DM4(src))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); + + /* Store the integer value from a VFP register. */ + return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | DN4(TMP_FREG1))); + } + + FAIL_IF(push_inst32(compiler, VCVT_F32_S32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_SINGLE_OP) | DD4(src1) | DM4(src2))); + return push_inst32(compiler, VMRS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + if (GET_OPCODE(op) != SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; + + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, dst_r, src, srcw); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + break; + case SLJIT_DABS: + FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DM4(src))); + op ^= SLJIT_SINGLE_OP; + break; + } + + if (dst & SLJIT_MEM) + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), dst_r, dst, dstw); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + op ^= SLJIT_SINGLE_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (src1 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); + src1 = TMP_FREG1; + } + if (src2 & SLJIT_MEM) { + emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) | FPU_LOAD, TMP_FREG2, src2, src2w); + src2 = TMP_FREG2; + } + + switch (GET_OPCODE(op)) { + case SLJIT_DADD: + FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_DSUB: + FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_DMUL: + FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + case SLJIT_DDIV: + FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_SINGLE_OP) | DD4(dst_r) | DN4(src1) | DM4(src2))); + break; + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP), TMP_FREG1, dst, dstw); +} + +#undef FPU_LOAD + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3)); + + /* Memory. */ + if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw)) + return compiler->error; + /* TMP_REG3 is used for caching. */ + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3))); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src))); + else if (src & SLJIT_MEM) { + if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw)) + FAIL_IF(compiler->error); + else { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0)); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2))); + } + } + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); + return push_inst16(compiler, BLX | RN3(TMP_REG3)); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +static sljit_uw get_cc(sljit_si type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_D_EQUAL: + return 0x0; + + case SLJIT_NOT_EQUAL: + case SLJIT_MUL_OVERFLOW: + case SLJIT_D_NOT_EQUAL: + return 0x1; + + case SLJIT_LESS: + case SLJIT_D_LESS: + return 0x3; + + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: + return 0x2; + + case SLJIT_GREATER: + case SLJIT_D_GREATER: + return 0x8; + + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: + return 0x9; + + case SLJIT_SIG_LESS: + return 0xb; + + case SLJIT_SIG_GREATER_EQUAL: + return 0xa; + + case SLJIT_SIG_GREATER: + return 0xc; + + case SLJIT_SIG_LESS_EQUAL: + return 0xd; + + case SLJIT_OVERFLOW: + case SLJIT_D_UNORDERED: + return 0x6; + + case SLJIT_NOT_OVERFLOW: + case SLJIT_D_ORDERED: + return 0x7; + + default: /* SLJIT_JUMP */ + SLJIT_ASSERT_STOP(); + return 0xe; + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + struct sljit_jump *jump; + sljit_ins cc; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + /* In ARM, we don't need to touch the arguments. */ + PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); + if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + cc = get_cc(type); + jump->flags |= cc << 8; + PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + } + + jump->addr = compiler->size; + if (type <= SLJIT_JUMP) + PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1))); + else { + jump->flags |= IS_BL; + PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1))); + } + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + /* In ARM, we don't need to touch the arguments. */ + if (!(src & SLJIT_IMM)) { + if (FAST_IS_REG(src)) + return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); + + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw)); + if (type >= SLJIT_FAST_CALL) + return push_inst16(compiler, BLX | RN3(TMP_REG1)); + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0)); + jump->u.target = srcw; + + FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0)); + jump->addr = compiler->size; + return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ + sljit_si dst_r, flags = GET_ALL_FLAGS(op); + sljit_ins cc, ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + op = GET_OPCODE(op); + cc = get_cc(type & 0xff); + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + + if (op < SLJIT_ADD) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + if (reg_map[dst_r] > 7) { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0)); + } else { + FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1)); + FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0)); + } + if (dst_r != TMP_REG2) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw); + } + + ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI)); + if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { + /* Does not change the other bits. */ + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1)); + if (flags & SLJIT_SET_E) { + /* The condition must always be set, even if the ORRI/EORI is not executed above. */ + if (reg_map[dst] <= 7) + return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst)); + } + return SLJIT_SUCCESS; + } + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw)); + src = TMP_REG2; + srcw = 0; + } else if (src & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + src = TMP_REG2; + srcw = 0; + } + + if (op == SLJIT_AND || src != dst_r) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0)); + } + else { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1)); + } + + if (dst_r == TMP_REG2) + FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0)); + + if (flags & SLJIT_SET_E) { + /* The condition must always be set, even if the ORR/EORI is not executed above. */ + if (reg_map[dst_r] <= 7) + return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_si dst_r; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_uh *inst = (sljit_uh*)addr; + modify_imm32_const(inst, new_addr); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + sljit_uh *inst = (sljit_uh*)addr; + modify_imm32_const(inst, new_constant); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} diff --git a/src/sljit/sljitNativeMIPS_32.c b/src/sljit/sljitNativeMIPS_32.c new file mode 100644 index 0000000..b2b60d7 --- /dev/null +++ b/src/sljit/sljitNativeMIPS_32.c @@ -0,0 +1,366 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 32-bit arch dependent functions. */ + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) +{ + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; +} + +#define EMIT_LOGICAL(op_imm, op_norm) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ + } + +#define EMIT_SHIFT(op_imm, op_v) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \ + } + +static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, + sljit_si dst, sljit_si src1, sljit_sw src2) +{ + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) { +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst)); +#else + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst)); +#endif + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) { +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst)); +#else + FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst)); +#endif + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst))); +#else + if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { + FAIL_IF(push_inst(compiler, SRL | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); + return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); + } + /* Nearly all instructions are unmovable in the following sequence. */ + FAIL_IF(push_inst(compiler, ADDU | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + /* Check zero. */ + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM(32), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(dst) | IMM(-1), DR(dst))); + /* Loop for searching the highest bit. */ + FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst))); + FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); + if (op & SLJIT_SET_E) + return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG); +#endif + return SLJIT_SUCCESS; + + case SLJIT_ADD: + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_O) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + } + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + if (op & (SLJIT_SET_C | SLJIT_SET_O)) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + else { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + } + } + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); + } + else { + if (op & SLJIT_SET_O) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (op & (SLJIT_SET_C | SLJIT_SET_O)) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* a + b >= a | b (otherwise, the carry should be set to 1). */ + if (op & (SLJIT_SET_C | SLJIT_SET_O)) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + if (!(op & SLJIT_SET_O)) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); + + case SLJIT_ADDC: + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_C) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + else { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + } + } + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); + } else { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); + } + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + + FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); + if (!(op & SLJIT_SET_C)) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + /* Set carry flag. */ + return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_O) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + } + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (op & (SLJIT_SET_C | SLJIT_SET_O)) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (op & SLJIT_SET_O) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); + if (op & SLJIT_SET_U) + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG)); + if (op & SLJIT_SET_S) { + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG)); + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG)); + } + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (!(op & SLJIT_SET_O)) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); + + FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); + return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + if (!(op & SLJIT_SET_O)) { +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); +#else + FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif + } + FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); + return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + EMIT_SHIFT(SLL, SLLV); + return SLJIT_SUCCESS; + + case SLJIT_LSHR: + EMIT_SHIFT(SRL, SRLV); + return SLJIT_SUCCESS; + + case SLJIT_ASHR: + EMIT_SHIFT(SRA, SRAV); + return SLJIT_SUCCESS; + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} diff --git a/src/sljit/sljitNativeMIPS_64.c b/src/sljit/sljitNativeMIPS_64.c new file mode 100644 index 0000000..185fb57 --- /dev/null +++ b/src/sljit/sljitNativeMIPS_64.c @@ -0,0 +1,469 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* mips 64-bit arch dependent functions. */ + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) +{ + sljit_si shift = 32; + sljit_si shift2; + sljit_si inv = 0; + sljit_ins ins; + sljit_uw uimm; + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm < 0 && imm >= SIMM_MIN) + return push_inst(compiler, ADDIU | SA(0) | TA(dst_ar) | IMM(imm), dst_ar); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(imm >> 16), dst_ar)); + return (imm & 0xffff) ? push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar) : SLJIT_SUCCESS; + } + + /* Zero extended number. */ + uimm = imm; + if (imm < 0) { + uimm = ~imm; + inv = 1; + } + + while (!(uimm & 0xff00000000000000l)) { + shift -= 8; + uimm <<= 8; + } + + if (!(uimm & 0xf000000000000000l)) { + shift -= 4; + uimm <<= 4; + } + + if (!(uimm & 0xc000000000000000l)) { + shift -= 2; + uimm <<= 2; + } + + if ((sljit_sw)uimm < 0) { + uimm >>= 1; + shift += 1; + } + SLJIT_ASSERT(((uimm & 0xc000000000000000l) == 0x4000000000000000l) && (shift > 0) && (shift <= 32)); + + if (inv) + uimm = ~uimm; + + FAIL_IF(push_inst(compiler, LUI | TA(dst_ar) | IMM(uimm >> 48), dst_ar)); + if (uimm & 0x0000ffff00000000l) + FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 32), dst_ar)); + + imm &= (1l << shift) - 1; + if (!(imm & ~0xffff)) { + ins = (shift == 32) ? DSLL32 : DSLL; + if (shift < 32) + ins |= SH_IMM(shift); + FAIL_IF(push_inst(compiler, ins | TA(dst_ar) | DA(dst_ar), dst_ar)); + return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); + } + + /* Double shifts needs to be performed. */ + uimm <<= 32; + shift2 = shift - 16; + + while (!(uimm & 0xf000000000000000l)) { + shift2 -= 4; + uimm <<= 4; + } + + if (!(uimm & 0xc000000000000000l)) { + shift2 -= 2; + uimm <<= 2; + } + + if (!(uimm & 0x8000000000000000l)) { + shift2--; + uimm <<= 1; + } + + SLJIT_ASSERT((uimm & 0x8000000000000000l) && (shift2 > 0) && (shift2 <= 16)); + + FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift - shift2), dst_ar)); + FAIL_IF(push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(uimm >> 48), dst_ar)); + FAIL_IF(push_inst(compiler, DSLL | TA(dst_ar) | DA(dst_ar) | SH_IMM(shift2), dst_ar)); + + imm &= (1l << shift2) - 1; + return !(imm & 0xffff) ? SLJIT_SUCCESS : push_inst(compiler, ORI | SA(dst_ar) | TA(dst_ar) | IMM(imm), dst_ar); +} + +#define SELECT_OP(a, b) \ + (!(op & SLJIT_INT_OP) ? a : b) + +#define EMIT_LOGICAL(op_imm, op_norm) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ + } \ + else { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ + } + +#define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \ + if (flags & SRC2_IMM) { \ + if (src2 >= 32) { \ + SLJIT_ASSERT(!(op & SLJIT_INT_OP)); \ + ins = op_dimm32; \ + src2 -= 32; \ + } \ + else \ + ins = (op & SLJIT_INT_OP) ? op_imm : op_dimm; \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ + } \ + else { \ + ins = (op & SLJIT_INT_OP) ? op_v : op_dv; \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \ + } + +static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, + sljit_si dst, sljit_si src1, sljit_sw src2) +{ + sljit_ins ins; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) { + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst)); + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) { + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst)); + } + return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UI: + SLJIT_ASSERT(!(op & SLJIT_INT_OP)); + FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst))); + return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst)); + + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst)); + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst))); +#else + if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { + FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); + return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); + } + /* Nearly all instructions are unmovable in the following sequence. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + /* Check zero. */ + FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_INT_OP) ? 32 : 64), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst))); + /* Loop for searching the highest bit. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); + FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); + if (op & SLJIT_SET_E) + return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG); +#endif + return SLJIT_SUCCESS; + + case SLJIT_ADD: + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_O) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + } + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + if (op & (SLJIT_SET_C | SLJIT_SET_O)) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + } + } + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } + else { + if (op & SLJIT_SET_O) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (op & (SLJIT_SET_C | SLJIT_SET_O)) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + /* a + b >= a | b (otherwise, the carry should be set to 1). */ + if (op & (SLJIT_SET_C | SLJIT_SET_O)) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + if (!(op & SLJIT_SET_O)) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); + + case SLJIT_ADDC: + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_C) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + else { + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + } + } + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); + } else { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); + if (!(op & SLJIT_SET_C)) + return SLJIT_SUCCESS; + + /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + /* Set carry flag. */ + return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_O) { + if (src2 >= 0) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + else + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + } + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (op & (SLJIT_SET_C | SLJIT_SET_O)) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (op & SLJIT_SET_O) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (op & SLJIT_SET_E) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); + if (op & SLJIT_SET_U) + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG)); + if (op & SLJIT_SET_S) { + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG)); + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG)); + } + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (!(op & SLJIT_SET_O)) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); + } + else { + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + /* dst may be the same as src1 or src2. */ + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); + } + + if (op & SLJIT_SET_C) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); + + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); + return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + + case SLJIT_MUL: + SLJIT_ASSERT(!(flags & SRC2_IMM)); + if (!(op & SLJIT_SET_O)) { +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + if (op & SLJIT_INT_OP) + return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); + FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#else + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + return push_inst(compiler, MFLO | D(dst), DR(dst)); +#endif + } + FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + + case SLJIT_AND: + EMIT_LOGICAL(ANDI, AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(ORI, OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(XORI, XOR); + return SLJIT_SUCCESS; + + case SLJIT_SHL: + EMIT_SHIFT(DSLL, DSLL32, SLL, DSLLV, SLLV); + return SLJIT_SUCCESS; + + case SLJIT_LSHR: + EMIT_SHIFT(DSRL, DSRL32, SRL, DSRLV, SRLV); + return SLJIT_SUCCESS; + + case SLJIT_ASHR: + EMIT_SHIFT(DSRA, DSRA32, SRA, DSRAV, SRAV); + return SLJIT_SUCCESS; + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, LUI | T(dst) | IMM(init_value >> 48), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 32), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + FAIL_IF(push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value >> 16), DR(dst))); + FAIL_IF(push_inst(compiler, DSLL | T(dst) | D(dst) | SH_IMM(16), DR(dst))); + return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | (new_addr & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 6); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 6); +} diff --git a/src/sljit/sljitNativeMIPS_common.c b/src/sljit/sljitNativeMIPS_common.c new file mode 100644 index 0000000..3e2c9f0 --- /dev/null +++ b/src/sljit/sljitNativeMIPS_common.c @@ -0,0 +1,2135 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Latest MIPS architecture. */ +/* Automatically detect SLJIT_MIPS_R1 */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return "MIPS32-R1" SLJIT_CPUINFO; +#else + return "MIPS64-R1" SLJIT_CPUINFO; +#endif +#else /* SLJIT_MIPS_R1 */ + return "MIPS III" SLJIT_CPUINFO; +#endif +} + +/* Length of an instruction word + Both for mips-32 and mips-64 */ +typedef sljit_ui sljit_ins; + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) + +/* For position independent code, t9 must contain the function address. */ +#define PIC_ADDR_REG TMP_REG2 + +/* Floating point status register. */ +#define FCSR_REG 31 +/* Return address register. */ +#define RETURN_ADDR_REG 31 + +/* Flags are kept in volatile registers. */ +#define EQUAL_FLAG 12 +/* And carry flag as well. */ +#define ULESS_FLAG 13 +#define UGREATER_FLAG 14 +#define LESS_FLAG 15 +#define GREATER_FLAG 31 +#define OVERFLOW_FLAG 1 + +#define TMP_FREG1 (0) +#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define S(s) (reg_map[s] << 21) +#define T(t) (reg_map[t] << 16) +#define D(d) (reg_map[d] << 11) +/* Absolute registers. */ +#define SA(s) ((s) << 21) +#define TA(t) ((t) << 16) +#define DA(d) ((d) << 11) +#define FT(t) ((t) << 16) +#define FS(s) ((s) << 11) +#define FD(d) ((d) << 6) +#define IMM(imm) ((imm) & 0xffff) +#define SH_IMM(imm) ((imm) << 6) + +#define DR(dr) (reg_map[dr]) +#define HI(opcode) ((opcode) << 26) +#define LO(opcode) (opcode) +/* S = (16 << 21) D = (17 << 21) */ +#define FMT_S (16 << 21) + +#define ABS_S (HI(17) | FMT_S | LO(5)) +#define ADD_S (HI(17) | FMT_S | LO(0)) +#define ADDIU (HI(9)) +#define ADDU (HI(0) | LO(33)) +#define AND (HI(0) | LO(36)) +#define ANDI (HI(12)) +#define B (HI(4)) +#define BAL (HI(1) | (17 << 16)) +#define BC1F (HI(17) | (8 << 21)) +#define BC1T (HI(17) | (8 << 21) | (1 << 16)) +#define BEQ (HI(4)) +#define BGEZ (HI(1) | (1 << 16)) +#define BGTZ (HI(7)) +#define BLEZ (HI(6)) +#define BLTZ (HI(1) | (0 << 16)) +#define BNE (HI(5)) +#define BREAK (HI(0) | LO(13)) +#define CFC1 (HI(17) | (2 << 21)) +#define C_UN_S (HI(17) | FMT_S | LO(49)) +#define C_UEQ_S (HI(17) | FMT_S | LO(51)) +#define C_ULE_S (HI(17) | FMT_S | LO(55)) +#define C_ULT_S (HI(17) | FMT_S | LO(53)) +#define CVT_S_S (HI(17) | FMT_S | LO(32)) +#define DADDIU (HI(25)) +#define DADDU (HI(0) | LO(45)) +#define DDIV (HI(0) | LO(30)) +#define DDIVU (HI(0) | LO(31)) +#define DIV (HI(0) | LO(26)) +#define DIVU (HI(0) | LO(27)) +#define DIV_S (HI(17) | FMT_S | LO(3)) +#define DMULT (HI(0) | LO(28)) +#define DMULTU (HI(0) | LO(29)) +#define DSLL (HI(0) | LO(56)) +#define DSLL32 (HI(0) | LO(60)) +#define DSLLV (HI(0) | LO(20)) +#define DSRA (HI(0) | LO(59)) +#define DSRA32 (HI(0) | LO(63)) +#define DSRAV (HI(0) | LO(23)) +#define DSRL (HI(0) | LO(58)) +#define DSRL32 (HI(0) | LO(62)) +#define DSRLV (HI(0) | LO(22)) +#define DSUBU (HI(0) | LO(47)) +#define J (HI(2)) +#define JAL (HI(3)) +#define JALR (HI(0) | LO(9)) +#define JR (HI(0) | LO(8)) +#define LD (HI(55)) +#define LUI (HI(15)) +#define LW (HI(35)) +#define MFC1 (HI(17)) +#define MFHI (HI(0) | LO(16)) +#define MFLO (HI(0) | LO(18)) +#define MOV_S (HI(17) | FMT_S | LO(6)) +#define MTC1 (HI(17) | (4 << 21)) +#define MUL_S (HI(17) | FMT_S | LO(2)) +#define MULT (HI(0) | LO(24)) +#define MULTU (HI(0) | LO(25)) +#define NEG_S (HI(17) | FMT_S | LO(7)) +#define NOP (HI(0) | LO(0)) +#define NOR (HI(0) | LO(39)) +#define OR (HI(0) | LO(37)) +#define ORI (HI(13)) +#define SD (HI(63)) +#define SLT (HI(0) | LO(42)) +#define SLTI (HI(10)) +#define SLTIU (HI(11)) +#define SLTU (HI(0) | LO(43)) +#define SLL (HI(0) | LO(0)) +#define SLLV (HI(0) | LO(4)) +#define SRL (HI(0) | LO(2)) +#define SRLV (HI(0) | LO(6)) +#define SRA (HI(0) | LO(3)) +#define SRAV (HI(0) | LO(7)) +#define SUB_S (HI(17) | FMT_S | LO(1)) +#define SUBU (HI(0) | LO(35)) +#define SW (HI(43)) +#define TRUNC_W_S (HI(17) | FMT_S | LO(13)) +#define XOR (HI(0) | LO(38)) +#define XORI (HI(14)) + +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) +#define CLZ (HI(28) | LO(32)) +#define DCLZ (HI(28) | LO(36)) +#define MUL (HI(28) | LO(2)) +#define SEB (HI(31) | (16 << 6) | LO(32)) +#define SEH (HI(31) | (24 << 6) | LO(32)) +#endif + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ADDU_W ADDU +#define ADDIU_W ADDIU +#define SLL_W SLL +#define SUBU_W SUBU +#else +#define ADDU_W DADDU +#define ADDIU_W DADDIU +#define SLL_W DSLL +#define SUBU_W DSUBU +#endif + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot) +{ + SLJIT_ASSERT(delay_slot == MOVABLE_INS || delay_slot >= UNMOVABLE_INS + || delay_slot == ((ins >> 11) & 0x1f) || delay_slot == ((ins >> 16) & 0x1f)); + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + compiler->delay_slot = delay_slot; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins invert_branch(sljit_si flags) +{ + return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16); +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + return code_ptr; +#else + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return code_ptr; +#endif + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size); + } + inst = (sljit_ins*)jump->addr; + if (jump->flags & IS_COND) + inst--; + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (jump->flags & IS_CALL) + goto keep_address; +#endif + + /* B instructions. */ + if (jump->flags & IS_MOVABLE) { + diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? BAL : B; + jump->addr -= sizeof(sljit_ins); + return inst; + } + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = saved_inst ^ invert_branch(jump->flags); + jump->addr -= 2 * sizeof(sljit_ins); + return inst; + } + } + else { + diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1)) >> 2; + if (diff <= SIMM_MAX && diff >= SIMM_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + inst[0] = (jump->flags & IS_JAL) ? BAL : B; + inst[1] = NOP; + return inst + 1; + } + inst[0] = inst[0] ^ invert_branch(jump->flags); + inst[1] = NOP; + jump->addr -= sizeof(sljit_ins); + return inst + 1; + } + } + + if (jump->flags & IS_COND) { + if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == ((jump->addr + 2 * sizeof(sljit_ins)) & ~0xfffffff)) { + jump->flags |= PATCH_J; + saved_inst = inst[0]; + inst[0] = inst[-1]; + inst[-1] = (saved_inst & 0xffff0000) | 3; + inst[1] = J; + inst[2] = NOP; + return inst + 2; + } + else if ((target_addr & ~0xfffffff) == ((jump->addr + 3 * sizeof(sljit_ins)) & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (inst[0] & 0xffff0000) | 3; + inst[1] = NOP; + inst[2] = J; + inst[3] = NOP; + jump->addr += sizeof(sljit_ins); + return inst + 3; + } + } + else { + /* J instuctions. */ + if ((jump->flags & IS_MOVABLE) && (target_addr & ~0xfffffff) == (jump->addr & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = inst[-1]; + inst[-1] = (jump->flags & IS_JAL) ? JAL : J; + jump->addr -= sizeof(sljit_ins); + return inst; + } + + if ((target_addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)) { + jump->flags |= PATCH_J; + inst[0] = (jump->flags & IS_JAL) ? JAL : J; + inst[1] = NOP; + return inst + 1; + } + } + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) +keep_address: + if (target_addr <= 0x7fffffff) { + jump->flags |= PATCH_ABS32; + if (jump->flags & IS_COND) { + inst[0] -= 4; + inst++; + } + inst[2] = inst[6]; + inst[3] = inst[7]; + return inst + 3; + } + if (target_addr <= 0x7fffffffffffl) { + jump->flags |= PATCH_ABS48; + if (jump->flags & IS_COND) { + inst[0] -= 2; + inst++; + } + inst[4] = inst[6]; + inst[5] = inst[7]; + return inst + 5; + } +#endif + + return code_ptr; +} + +#ifdef __GNUC__ +static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_ptr) +{ + SLJIT_CACHE_FLUSH(code, code_ptr); +} +#endif + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 7); +#endif + code_ptr = detect_jump_type(jump, code_ptr, code); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + /* Just recording the address. */ + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins*)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_sw)(addr - (jump->addr + sizeof(sljit_ins))) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff); + break; + } + if (jump->flags & PATCH_J) { + SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)); + buf_ptr[0] |= (addr >> 2) & 0x03ffffff; + break; + } + + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); +#else + if (jump->flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= 0x7fffffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + } + else if (jump->flags & PATCH_ABS48) { + SLJIT_ASSERT(addr <= 0x7fffffffffffl); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); + } + else { + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[5] = (buf_ptr[5] & 0xffff0000) | (addr & 0xffff); + } +#endif + } while (0); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); +#ifndef __GNUC__ + SLJIT_CACHE_FLUSH(code, code_ptr); +#else + /* GCC workaround for invalid code generation with -O2. */ + sljit_cache_flush(code, code_ptr); +#endif + return code; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define WRITE_BACK 0x00020 +#define ARG_TEST 0x00040 +#define ALT_KEEP_CACHE 0x00080 +#define CUMULATIVE_OP 0x00100 +#define LOGICAL_OP 0x00200 +#define IMM_OP 0x00400 +#define SRC2_IMM 0x00800 + +#define UNUSED_DEST 0x01000 +#define REG_DEST 0x02000 +#define REG1_SOURCE 0x04000 +#define REG2_SOURCE 0x08000 +#define SLOW_SRC1 0x10000 +#define SLOW_SRC2 0x20000 +#define SLOW_DEST 0x40000 + +/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */ +#define CHECK_FLAGS(list) \ + (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define STACK_STORE SW +#define STACK_LOAD LW +#else +#define STACK_STORE SD +#define STACK_LOAD LD +#endif + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#include "sljitNativeMIPS_32.c" +#else +#include "sljitNativeMIPS_64.c" +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_ins base; + sljit_si i, tmp, offs; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + local_size = (local_size + 15) & ~0xf; +#else + local_size = (local_size + 31) & ~0x1f; +#endif + compiler->local_size = local_size; + + if (local_size <= SIMM_MAX) { + /* Frequent case. */ + FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP))); + base = S(SLJIT_SP); + } + else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP))); + base = S(TMP_REG2); + local_size = 0; + } + + offs = local_size - (sljit_sw)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS)); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(i) | IMM(offs), MOVABLE_INS)); + } + + if (args >= 1) + FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_S0), DR(SLJIT_S0))); + if (args >= 2) + FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_S1), DR(SLJIT_S1))); + if (args >= 3) + FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_S2), DR(SLJIT_S2))); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + compiler->local_size = (local_size + 15) & ~0xf; +#else + compiler->local_size = (local_size + 31) & ~0x1f; +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + sljit_si local_size, i, tmp, offs; + sljit_ins base; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + local_size = compiler->local_size; + if (local_size <= SIMM_MAX) + base = S(SLJIT_SP); + else { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size)); + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(TMP_REG1), DR(TMP_REG1))); + base = S(TMP_REG1); + local_size = 0; + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - (sljit_si)sizeof(sljit_sw)), RETURN_ADDR_REG)); + offs = local_size - (sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(i) | IMM(offs), DR(i))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + SLJIT_ASSERT(offs == local_size - (sljit_sw)(sizeof(sljit_sw))); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + if (compiler->local_size <= SIMM_MAX) + return push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(compiler->local_size), UNMOVABLE_INS); + else + return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_SP), UNMOVABLE_INS); +} + +#undef STACK_STORE +#undef STACK_LOAD + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* u w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* u b s */ HI(40) /* sb */, +/* u b l */ HI(36) /* lbu */, +/* u h s */ HI(41) /* sh */, +/* u h l */ HI(37) /* lhu */, +/* u i s */ HI(43) /* sw */, +/* u i l */ ARCH_32_64(HI(35) /* lw */, HI(39) /* lwu */), + +/* s w s */ ARCH_32_64(HI(43) /* sw */, HI(63) /* sd */), +/* s w l */ ARCH_32_64(HI(35) /* lw */, HI(55) /* ld */), +/* s b s */ HI(40) /* sb */, +/* s b l */ HI(32) /* lb */, +/* s h s */ HI(41) /* sh */, +/* s h l */ HI(33) /* lh */, +/* s i s */ HI(43) /* sw */, +/* s i l */ HI(35) /* lw */, + +/* d s */ HI(61) /* sdc1 */, +/* d l */ HI(53) /* ldc1 */, +/* s s */ HI(57) /* swc1 */, +/* s l */ HI(49) /* lwc1 */, +}; + +#undef ARCH_32_64 + +/* reg_ar is an absoulute register! */ + +/* Can perform an operation using at most 1 instruction. */ +static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) && !(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(arg & REG_MASK) + | TA(reg_ar) | IMM(argw), ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? reg_ar : MOVABLE_INS)); + return -1; + } + return 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) + return 1; + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_si tmp_ar, base, delay_slot; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) { + tmp_ar = reg_ar; + delay_slot = reg_ar; + } else { + tmp_ar = DR(TMP_REG1); + delay_slot = MOVABLE_INS; + } + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + if ((flags & WRITE_BACK) && reg_ar == DR(base)) { + SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar); + FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + reg_ar = DR(TMP_REG1); + } + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (!(flags & WRITE_BACK)) { + if (arg == compiler->cache_arg) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + } + else { + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); + } + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3))); + } + + if (!(flags & WRITE_BACK)) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + tmp_ar = DR(TMP_REG3); + } + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(base), DR(base))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); + } + + if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { + /* Update only applies if a base register exists. */ + if (reg_ar == DR(base)) { + SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar); + if (argw <= SIMM_MAX && argw >= SIMM_MIN) { + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS)); + if (argw) + return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)); + return SLJIT_SUCCESS; + } + FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); + reg_ar = DR(TMP_REG1); + } + + if (argw <= SIMM_MAX && argw >= SIMM_MIN) { + if (argw) + FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base))); + } + else { + if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + compiler->cache_argw = argw; + } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); + } + else { + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); + } + } + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); + } + + if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + compiler->cache_argw = argw; + } + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + } + + if (compiler->cache_arg == SLJIT_MEM && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN) { + if (argw != compiler->cache_argw) + FAIL_IF(push_inst(compiler, ADDIU_W | S(TMP_REG3) | T(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + } + else { + compiler->cache_arg = SLJIT_MEM; + FAIL_IF(load_immediate(compiler, DR(TMP_REG3), argw)); + } + compiler->cache_argw = argw; + + if (!base) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + + if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) { + compiler->cache_arg = arg; + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | D(TMP_REG3), DR(TMP_REG3))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + } + + FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | T(base) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); +} + +static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) +{ + if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); +} + +static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_si dst_r = TMP_REG2; + sljit_si src1_r; + sljit_sw src2_r = 0; + sljit_si sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + if (GET_FLAGS(op)) + flags |= UNUSED_DEST; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w) { + if ((!(flags & LOGICAL_OP) && (src2w <= SIMM_MAX && src2w >= SIMM_MIN)) + || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_MAX))) { + flags |= SRC2_IMM; + src2_r = src2w; + } + } + if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { + if ((!(flags & LOGICAL_OP) && (src1w <= SIMM_MAX && src1w >= SIMM_MIN)) + || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_MAX))) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); + src1_r = TMP_REG1; + } + else + src1_r = 0; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w)); + src2_r = sugg_src2_r; + } + else { + src2_r = 0; + if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) + dst_r = 0; + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, DR(dst_r), dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, DR(dst_r), dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + sljit_si int_op = op & SLJIT_INT_OP; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, BREAK, UNMOVABLE_INS); + case SLJIT_NOP: + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_LUMUL: + case SLJIT_LSMUL: +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#else + FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); + case SLJIT_LUDIV: + case SLJIT_LSDIV: +#if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (int_op) + FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#else + FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); +#endif + + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); + return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_si flags = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if ((op & SLJIT_INT_OP) && GET_OPCODE(op) >= SLJIT_NOT) { + flags |= INT_DATA | SIGNED_DATA; + if (src & SLJIT_IMM) + srcw = (sljit_si)srcw; + } +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UI: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ui)srcw : srcw); +#endif + + case SLJIT_MOV_SI: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_si)srcw : srcw); +#endif + + case SLJIT_MOV_UB: + return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + + case SLJIT_MOV_SB: + return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + + case SLJIT_MOV_UH: + return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + + case SLJIT_MOV_SH: + return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + + case SLJIT_MOVU: + case SLJIT_MOVU_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UI: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ui)srcw : srcw); +#endif + + case SLJIT_MOVU_SI: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_si)srcw : srcw); +#endif + + case SLJIT_MOVU_UB: + return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + + case SLJIT_MOVU_SB: + return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + + case SLJIT_MOVU_UH: + return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + + case SLJIT_MOVU_SH: + return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); + + case SLJIT_CLZ: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + } + + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_si flags = 0; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op & SLJIT_INT_OP) { + flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (sljit_si)src1w; + if (src2 & SLJIT_IMM) + src2w = (sljit_si)src2w; + } +#endif + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + if (src2 & SLJIT_IMM) + src2w &= 0x1f; +#else + if (src2 & SLJIT_IMM) { + if (op & SLJIT_INT_OP) + src2w &= 0x1f; + else + src2w &= 0x3f; + } +#endif + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg << 1; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#elif defined(__GNUC__) + sljit_sw fir; + asm ("cfc1 %0, $0" : "=r"(fir)); + return (fir >> 22) & 0x1; +#else +#error "FIR check is not implemented for this architecture" +#endif +} + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) +#define FMT(op) (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) << (21 - 8)) + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVW_FROMD) << 21; +#endif + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + else + src <<= 1; + + FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS); + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, flags ? DOUBLE_DATA : SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef is_long +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define flags 0 +#else + sljit_si flags = (GET_OPCODE(op) == SLJIT_CONVD_FROMW) << 21; +#endif + + sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MTC1 | flags | T(src) | FS(TMP_FREG1), MOVABLE_INS)); + else if (src & SLJIT_MEM) { + /* Load the integer value into a VFP register. */ + FAIL_IF(emit_op_mem2(compiler, ((flags) ? DOUBLE_DATA : SINGLE_DATA) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + } + else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + FAIL_IF(push_inst(compiler, MTC1 | flags | T(TMP_REG1) | FS(TMP_FREG1), MOVABLE_INS)); + } + + FAIL_IF(push_inst(compiler, CVT_S_S | flags | (4 << 21) | (((op & SLJIT_SINGLE_OP) ^ SLJIT_SINGLE_OP) >> 8) | FS(TMP_FREG1) | FD(dst_r), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef flags +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + else + src2 <<= 1; + + /* src2 and src1 are swapped. */ + if (op & SLJIT_SET_E) { + FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG)); + } + if (op & SLJIT_SET_S) { + /* Mixing the instructions for the two checks. */ + FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG)); + FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG)); + FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG)); + } + return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; + + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + else + src <<= 1; + + switch (GET_OPCODE(op)) { + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, MOV_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, NEG_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, ABS_S | FMT(op) | FS(src) | FD(dst_r), MOVABLE_INS)); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, CVT_S_S | ((op & SLJIT_SINGLE_OP) ? 1 : (1 << 21)) | FS(src) | FD(dst_r), MOVABLE_INS)); + op ^= SLJIT_SINGLE_OP; + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + else + src2 <<= 1; + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, ADD_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, SUB_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, MUL_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, DIV_S | FMT(op) | FT(src2) | FS(src1) | FD(dst_r), MOVABLE_INS)); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_DATA, RETURN_ADDR_REG, dst, dstw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | DA(RETURN_ADDR_REG), RETURN_ADDR_REG)); + else if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw)); + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, RETURN_ADDR_REG, srcw)); + + FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, NOP, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + compiler->delay_slot = UNMOVABLE_INS; + return label; +} + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +#define JUMP_LENGTH 4 +#else +#define JUMP_LENGTH 8 +#endif + +#define BR_Z(src) \ + inst = BEQ | SA(src) | TA(0) | JUMP_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#define BR_NZ(src) \ + inst = BNE | SA(src) | TA(0) | JUMP_LENGTH; \ + flags = IS_BIT26_COND; \ + delay_check = src; + +#define BR_T() \ + inst = BC1T | JUMP_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC; + +#define BR_F() \ + inst = BC1F | JUMP_LENGTH; \ + flags = IS_BIT16_COND; \ + delay_check = FCSR_FCC; + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + struct sljit_jump *jump; + sljit_ins inst; + sljit_si flags = 0; + sljit_si delay_check = UNMOVABLE_INS; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + switch (type) { + case SLJIT_EQUAL: + case SLJIT_D_NOT_EQUAL: + BR_NZ(EQUAL_FLAG); + break; + case SLJIT_NOT_EQUAL: + case SLJIT_D_EQUAL: + BR_Z(EQUAL_FLAG); + break; + case SLJIT_LESS: + case SLJIT_D_LESS: + BR_Z(ULESS_FLAG); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: + BR_NZ(ULESS_FLAG); + break; + case SLJIT_GREATER: + case SLJIT_D_GREATER: + BR_Z(UGREATER_FLAG); + break; + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: + BR_NZ(UGREATER_FLAG); + break; + case SLJIT_SIG_LESS: + BR_Z(LESS_FLAG); + break; + case SLJIT_SIG_GREATER_EQUAL: + BR_NZ(LESS_FLAG); + break; + case SLJIT_SIG_GREATER: + BR_Z(GREATER_FLAG); + break; + case SLJIT_SIG_LESS_EQUAL: + BR_NZ(GREATER_FLAG); + break; + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: + BR_Z(OVERFLOW_FLAG); + break; + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: + BR_NZ(OVERFLOW_FLAG); + break; + case SLJIT_D_UNORDERED: + BR_F(); + break; + case SLJIT_D_ORDERED: + BR_T(); + break; + default: + /* Not conditional branch. */ + inst = 0; + break; + } + + jump->flags |= flags; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != delay_check)) + jump->flags |= IS_MOVABLE; + + if (inst) + PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS)); + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + if (type <= SLJIT_JUMP) { + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + } else { + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + /* Cannot be optimized out if type is >= CALL0. */ + jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? IS_CALL : 0); + PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + jump->addr = compiler->size; + /* A NOP if type < CALL1. */ + PTR_FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS)); + } + return jump; +} + +#define RESOLVE_IMM1() \ + if (src1 & SLJIT_IMM) { \ + if (src1w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG1), src1w)); \ + src1 = TMP_REG1; \ + } \ + else \ + src1 = 0; \ + } + +#define RESOLVE_IMM2() \ + if (src2 & SLJIT_IMM) { \ + if (src2w) { \ + PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \ + src2 = TMP_REG2; \ + } \ + else \ + src2 = 0; \ + } + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_si flags; + sljit_ins inst; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + flags = ((type & SLJIT_INT_OP) ? INT_DATA : WORD_DATA) | LOAD_DATA; + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG1), src1, src1w, src2, src2w)); + src1 = TMP_REG1; + } + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0)); + src2 = TMP_REG2; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type <= SLJIT_NOT_EQUAL) { + RESOLVE_IMM1(); + RESOLVE_IMM2(); + jump->flags |= IS_BIT26_COND; + if (compiler->delay_slot == MOVABLE_INS || (compiler->delay_slot != UNMOVABLE_INS && compiler->delay_slot != DR(src1) && compiler->delay_slot != DR(src2))) + jump->flags |= IS_MOVABLE; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(src1) | T(src2) | JUMP_LENGTH, UNMOVABLE_INS)); + } + else if (type >= SLJIT_SIG_LESS && (((src1 & SLJIT_IMM) && (src1w == 0)) || ((src2 & SLJIT_IMM) && (src2w == 0)))) { + inst = NOP; + if ((src1 & SLJIT_IMM) && (src1w == 0)) { + RESOLVE_IMM2(); + switch (type) { + case SLJIT_SIG_LESS: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_GREATER: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + } + src1 = src2; + } + else { + RESOLVE_IMM1(); + switch (type) { + case SLJIT_SIG_LESS: + inst = BGEZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_GREATER_EQUAL: + inst = BLTZ; + jump->flags |= IS_BIT16_COND; + break; + case SLJIT_SIG_GREATER: + inst = BLEZ; + jump->flags |= IS_BIT26_COND; + break; + case SLJIT_SIG_LESS_EQUAL: + inst = BGTZ; + jump->flags |= IS_BIT26_COND; + break; + } + } + PTR_FAIL_IF(push_inst(compiler, inst | S(src1) | JUMP_LENGTH, UNMOVABLE_INS)); + } + else { + if (type == SLJIT_LESS || type == SLJIT_GREATER_EQUAL || type == SLJIT_SIG_LESS || type == SLJIT_SIG_GREATER_EQUAL) { + RESOLVE_IMM1(); + if ((src2 & SLJIT_IMM) && src2w <= SIMM_MAX && src2w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src1) | T(TMP_REG1) | IMM(src2w), DR(TMP_REG1))); + else { + RESOLVE_IMM2(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src1) | T(src2) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_LESS || type == SLJIT_SIG_LESS) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; + } + else { + RESOLVE_IMM2(); + if ((src1 & SLJIT_IMM) && src1w <= SIMM_MAX && src1w >= SIMM_MIN) + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTIU : SLTI) | S(src2) | T(TMP_REG1) | IMM(src1w), DR(TMP_REG1))); + else { + RESOLVE_IMM1(); + PTR_FAIL_IF(push_inst(compiler, (type <= SLJIT_LESS_EQUAL ? SLTU : SLT) | S(src2) | T(src1) | D(TMP_REG1), DR(TMP_REG1))); + } + type = (type == SLJIT_GREATER || type == SLJIT_SIG_GREATER) ? SLJIT_NOT_EQUAL : SLJIT_EQUAL; + } + + jump->flags |= IS_BIT26_COND; + PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | JUMP_LENGTH, UNMOVABLE_INS)); + } + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return jump; +} + +#undef RESOLVE_IMM1 +#undef RESOLVE_IMM2 + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_si type, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + struct sljit_jump *jump; + sljit_ins inst; + sljit_si if_true; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (src1 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + else + src2 <<= 1; + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->flags |= IS_BIT16_COND; + + switch (type & 0xff) { + case SLJIT_D_EQUAL: + inst = C_UEQ_S; + if_true = 1; + break; + case SLJIT_D_NOT_EQUAL: + inst = C_UEQ_S; + if_true = 0; + break; + case SLJIT_D_LESS: + inst = C_ULT_S; + if_true = 1; + break; + case SLJIT_D_GREATER_EQUAL: + inst = C_ULT_S; + if_true = 0; + break; + case SLJIT_D_GREATER: + inst = C_ULE_S; + if_true = 0; + break; + case SLJIT_D_LESS_EQUAL: + inst = C_ULE_S; + if_true = 1; + break; + case SLJIT_D_UNORDERED: + inst = C_UN_S; + if_true = 1; + break; + default: /* Make compilers happy. */ + SLJIT_ASSERT_STOP(); + case SLJIT_D_ORDERED: + inst = C_UN_S; + if_true = 0; + break; + } + + PTR_FAIL_IF(push_inst(compiler, inst | FMT(type) | FT(src2) | FS(src1), UNMOVABLE_INS)); + /* Intentionally the other opcode. */ + PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS)); + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return jump; +} + +#undef JUMP_LENGTH +#undef BR_Z +#undef BR_NZ +#undef BR_T +#undef BR_F + +#undef FLOAT_DATA +#undef FMT + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + sljit_si src_r = TMP_REG2; + struct sljit_jump *jump = NULL; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) { + if (DR(src) != 4) + src_r = src; + else + FAIL_IF(push_inst(compiler, ADDU_W | S(src) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + } + + if (type >= SLJIT_CALL0) { + SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2); + if (src & (SLJIT_IMM | SLJIT_MEM)) { + if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw)); + else { + SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM)); + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + } + FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + /* We need an extra instruction in any case. */ + return push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), UNMOVABLE_INS); + } + + /* Register input. */ + if (type >= SLJIT_CALL1) + FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_R0) | TA(0) | DA(4), 4)); + FAIL_IF(push_inst(compiler, JALR | S(src_r) | DA(RETURN_ADDR_REG), UNMOVABLE_INS)); + return push_inst(compiler, ADDU_W | S(src_r) | TA(0) | D(PIC_ADDR_REG), UNMOVABLE_INS); + } + + if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); + jump->u.target = srcw; + + if (compiler->delay_slot != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + + FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + } + else if (src & SLJIT_MEM) + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + + FAIL_IF(push_inst(compiler, JR | S(src_r), UNMOVABLE_INS)); + if (jump) + jump->addr = compiler->size; + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ + sljit_si sugg_dst_ar, dst_ar; + sljit_si flags = GET_ALL_FLAGS(op); +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# define mem_type WORD_DATA +#else + sljit_si mem_type = (op & SLJIT_INT_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + op = GET_OPCODE(op); +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (op == SLJIT_MOV_SI || op == SLJIT_MOV_UI) + mem_type = INT_DATA | SIGNED_DATA; +#endif + sugg_dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw)); + src = TMP_REG1; + srcw = 0; + } + + switch (type & 0xff) { + case SLJIT_EQUAL: + case SLJIT_NOT_EQUAL: + FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); + dst_ar = sugg_dst_ar; + break; + case SLJIT_LESS: + case SLJIT_GREATER_EQUAL: + case SLJIT_D_LESS: + case SLJIT_D_GREATER_EQUAL: + dst_ar = ULESS_FLAG; + break; + case SLJIT_GREATER: + case SLJIT_LESS_EQUAL: + case SLJIT_D_GREATER: + case SLJIT_D_LESS_EQUAL: + dst_ar = UGREATER_FLAG; + break; + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER_EQUAL: + dst_ar = LESS_FLAG; + break; + case SLJIT_SIG_GREATER: + case SLJIT_SIG_LESS_EQUAL: + dst_ar = GREATER_FLAG; + break; + case SLJIT_OVERFLOW: + case SLJIT_NOT_OVERFLOW: + dst_ar = OVERFLOW_FLAG; + break; + case SLJIT_MUL_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: + FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); + dst_ar = sugg_dst_ar; + type ^= 0x1; /* Flip type bit for the XORI below. */ + break; + case SLJIT_D_EQUAL: + case SLJIT_D_NOT_EQUAL: + dst_ar = EQUAL_FLAG; + break; + + case SLJIT_D_UNORDERED: + case SLJIT_D_ORDERED: + FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar)); + FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar)); + FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); + dst_ar = sugg_dst_ar; + break; + + default: + SLJIT_ASSERT_STOP(); + dst_ar = sugg_dst_ar; + break; + } + + if (type & 0x1) { + FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); + dst_ar = sugg_dst_ar; + } + + if (op >= SLJIT_ADD) { + if (DR(TMP_REG2) != dst_ar) + FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw); + + if (sugg_dst_ar != dst_ar) + return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar); + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) +# undef mem_type +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_si reg; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + + PTR_FAIL_IF(emit_const(compiler, reg, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + return const_; +} diff --git a/src/sljit/sljitNativePPC_32.c b/src/sljit/sljitNativePPC_32.c new file mode 100644 index 0000000..b14b75c --- /dev/null +++ b/src/sljit/sljitNativePPC_32.c @@ -0,0 +1,269 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 32-bit arch dependent functions. */ + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) +{ + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); + + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; +} + +#define INS_CLEAR_LEFT(dst, src, from) \ + (RLWINM | S(src) | A(dst) | ((from) << 6) | (31 << 1)) + +static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, + sljit_si dst, sljit_si src1, sljit_si src2) +{ + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_NEG: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); + + case SLJIT_ADD: + if (flags & ALT_FORM1) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); + return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + if (flags & ALT_FORM1) { + FAIL_IF(push_inst(compiler, MFXER | D(0))); + FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, MTXER | S(0)); + } + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & (ALT_FORM2 | ALT_FORM3)) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); + if (flags & ALT_FORM3) + return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm); + return SLJIT_SUCCESS; + } + if (flags & (ALT_FORM4 | ALT_FORM5)) { + if (flags & ALT_FORM4) + FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); + return SLJIT_SUCCESS; + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + if (flags & ALT_FORM6) + FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); + return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + + case SLJIT_SUBC: + if (flags & ALT_FORM1) { + FAIL_IF(push_inst(compiler, MFXER | D(0))); + FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); + return push_inst(compiler, MTXER | S(0)); + } + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); + } + return push_inst(compiler, SLW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); + } + return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + if (flags & ALT_FORM3) + FAIL_IF(push_inst(compiler, MFXER | D(0))); + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + compiler->imm &= 0x1f; + FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11))); + } + else + FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2))); + return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS; + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} diff --git a/src/sljit/sljitNativePPC_64.c b/src/sljit/sljitNativePPC_64.c new file mode 100644 index 0000000..182ac7b --- /dev/null +++ b/src/sljit/sljitNativePPC_64.c @@ -0,0 +1,421 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ppc 64-bit arch dependent functions. */ + +#if defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) +#define ASM_SLJIT_CLZ(src, dst) \ + __asm__ volatile ( "cntlzd %0, %1" : "=r"(dst) : "r"(src) ) +#elif defined(__xlc__) +#error "Please enable GCC syntax for inline assembly statements" +#else +#error "Must implement count leading zeroes" +#endif + +#define RLDI(dst, src, sh, mb, type) \ + (HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20)) + +#define PUSH_RLDICR(reg, shift) \ + push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1)) + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) +{ + sljit_uw tmp; + sljit_uw shift; + sljit_uw tmp2; + sljit_uw shift2; + + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(TMP_ZERO) | A(reg) | IMM(imm)); + + if (imm <= 0x7fffffffl && imm >= -0x80000000l) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; + } + + /* Count leading zeroes. */ + tmp = (imm >= 0) ? imm : ~imm; + ASM_SLJIT_CLZ(tmp, shift); + SLJIT_ASSERT(shift > 0); + shift--; + tmp = (imm << shift); + + if ((tmp & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + shift += 15; + return PUSH_RLDICR(reg, shift); + } + + if ((tmp & ~0xffffffff00000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32))); + shift += 31; + return PUSH_RLDICR(reg, shift); + } + + /* Cut out the 16 bit from immediate. */ + shift += 15; + tmp2 = imm & ((1ul << (63 - shift)) - 1); + + if (tmp2 <= 0xffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(PUSH_RLDICR(reg, shift)); + return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2); + } + + if (tmp2 <= 0xffffffff) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + FAIL_IF(PUSH_RLDICR(reg, shift)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16))); + return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS; + } + + ASM_SLJIT_CLZ(tmp2, shift2); + tmp2 <<= shift2; + + if ((tmp2 & ~0xffff000000000000ul) == 0) { + FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48))); + shift2 += 15; + shift += (63 - shift2); + FAIL_IF(PUSH_RLDICR(reg, shift)); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48))); + return PUSH_RLDICR(reg, shift2); + } + + /* The general version. */ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32))); + FAIL_IF(PUSH_RLDICR(reg, 31)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)); +} + +/* Simplified mnemonics: clrldi. */ +#define INS_CLEAR_LEFT(dst, src, from) \ + (RLDICL | S(src) | A(dst) | ((from) << 6) | (1 << 5)) + +/* Sign extension for integer operations. */ +#define UN_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG2_SOURCE)) == (ALT_SIGN_EXT | REG2_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } + +#define BIN_EXTS() \ + if (flags & ALT_SIGN_EXT) { \ + if (flags & REG1_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } \ + if (flags & REG2_SOURCE) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src2) | A(TMP_REG2))); \ + src2 = TMP_REG2; \ + } \ + } + +#define BIN_IMM_EXTS() \ + if ((flags & (ALT_SIGN_EXT | REG1_SOURCE)) == (ALT_SIGN_EXT | REG1_SOURCE)) { \ + FAIL_IF(push_inst(compiler, EXTSW | S(src1) | A(TMP_REG1))); \ + src1 = TMP_REG1; \ + } + +static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, + sljit_si dst, sljit_si src1, sljit_si src2) +{ + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1); + if (dst != src2) + return push_inst(compiler, OR | S(src2) | A(dst) | B(src2)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SI) + return push_inst(compiler, EXTSW | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 0)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 24)); + } + else if ((flags & REG_DEST) && op == SLJIT_MOV_SB) + return push_inst(compiler, EXTSB | S(src2) | A(dst)); + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) + return push_inst(compiler, EXTSH | S(src2) | A(dst)); + return push_inst(compiler, INS_CLEAR_LEFT(dst, src2, 16)); + } + else { + SLJIT_ASSERT(dst == src2); + } + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2)); + + case SLJIT_NEG: + SLJIT_ASSERT(src1 == TMP_REG1); + UN_EXTS(); + return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1); + if (flags & ALT_FORM1) + return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); + return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst)); + + case SLJIT_ADD: + if (flags & ALT_FORM1) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + BIN_IMM_EXTS(); + return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); + return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); + BIN_EXTS(); + return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + + case SLJIT_ADDC: + if (flags & ALT_FORM1) { + FAIL_IF(push_inst(compiler, MFXER | D(0))); + FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); + return push_inst(compiler, MTXER | S(0)); + } + BIN_EXTS(); + return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); + + case SLJIT_SUB: + if (flags & ALT_FORM1) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); + } + if (flags & (ALT_FORM2 | ALT_FORM3)) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (flags & ALT_FORM3) + return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); + return SLJIT_SUCCESS; + } + if (flags & (ALT_FORM4 | ALT_FORM5)) { + if (flags & ALT_FORM4) + FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (flags & ALT_FORM5) + return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); + return SLJIT_SUCCESS; + } + if (!(flags & ALT_SET_FLAGS)) + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + BIN_EXTS(); + if (flags & ALT_FORM6) + FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + + case SLJIT_SUBC: + if (flags & ALT_FORM1) { + FAIL_IF(push_inst(compiler, MFXER | D(0))); + FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); + return push_inst(compiler, MTXER | S(0)); + } + BIN_EXTS(); + return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); + + case SLJIT_MUL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); + } + BIN_EXTS(); + if (flags & ALT_FORM2) + return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1)); + + case SLJIT_AND: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ANDIS | S(src1) | A(dst) | compiler->imm); + } + return push_inst(compiler, AND | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_OR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, ORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, ORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, ORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, OR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_XOR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORI | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM2) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, XORIS | S(src1) | A(dst) | compiler->imm); + } + if (flags & ALT_FORM3) { + SLJIT_ASSERT(src2 == TMP_REG2); + FAIL_IF(push_inst(compiler, XORI | S(src1) | A(dst) | IMM(compiler->imm))); + return push_inst(compiler, XORIS | S(dst) | A(dst) | IMM(compiler->imm >> 16)); + } + return push_inst(compiler, XOR | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_SHL: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); + } + else { + compiler->imm &= 0x3f; + return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags)); + } + } + return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_LSHR: + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); + } + else { + compiler->imm &= 0x3f; + return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags)); + } + } + return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2)); + + case SLJIT_ASHR: + if (flags & ALT_FORM3) + FAIL_IF(push_inst(compiler, MFXER | D(0))); + if (flags & ALT_FORM1) { + SLJIT_ASSERT(src2 == TMP_REG2); + if (flags & ALT_FORM2) { + compiler->imm &= 0x1f; + FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11))); + } + else { + compiler->imm &= 0x3f; + FAIL_IF(push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4))); + } + } + else + FAIL_IF(push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2))); + return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS; + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si reg, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(init_value >> 48))); + FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value >> 32))); + FAIL_IF(PUSH_RLDICR(reg, 31)); + FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(init_value >> 16))); + return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff); + SLJIT_CACHE_FLUSH(inst, inst + 5); +} diff --git a/src/sljit/sljitNativePPC_common.c b/src/sljit/sljitNativePPC_common.c new file mode 100644 index 0000000..08d5356 --- /dev/null +++ b/src/sljit/sljitNativePPC_common.c @@ -0,0 +1,2374 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ + return "PowerPC" SLJIT_CPUINFO; +} + +/* Length of an instruction word. + Both for ppc-32 and ppc-64. */ +typedef sljit_ui sljit_ins; + +#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \ + || (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define SLJIT_PPC_STACK_FRAME_V2 1 +#endif + +#ifdef _AIX +#include +#endif + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1 +#endif + +static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) +{ +#ifdef _AIX + _sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from)); +#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM) +# if defined(_ARCH_PWR) || defined(_ARCH_PWR2) + /* Cache flush for POWER architecture. */ + while (from < to) { + __asm__ volatile ( + "clf 0, %0\n" + "dcs\n" + : : "r"(from) + ); + from++; + } + __asm__ volatile ( "ics" ); +# elif defined(_ARCH_COM) && !defined(_ARCH_PPC) +# error "Cache flush is not implemented for PowerPC/POWER common mode." +# else + /* Cache flush for PowerPC architecture. */ + while (from < to) { + __asm__ volatile ( + "dcbf 0, %0\n" + "sync\n" + "icbi 0, %0\n" + : : "r"(from) + ); + from++; + } + __asm__ volatile ( "isync" ); +# endif +# ifdef __xlc__ +# warning "This file may fail to compile if -qfuncsect is used" +# endif +#elif defined(__xlc__) +#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc" +#else +#error "This platform requires a cache flush implementation." +#endif /* _AIX */ +} + +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_ZERO (SLJIT_NUMBER_OF_REGISTERS + 5) + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +#define TMP_CALL_REG (SLJIT_NUMBER_OF_REGISTERS + 6) +#else +#define TMP_CALL_REG TMP_REG2 +#endif + +#define TMP_FREG1 (0) +#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { + 0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ +#define D(d) (reg_map[d] << 21) +#define S(s) (reg_map[s] << 21) +#define A(a) (reg_map[a] << 16) +#define B(b) (reg_map[b] << 11) +#define C(c) (reg_map[c] << 6) +#define FD(fd) ((fd) << 21) +#define FS(fs) ((fs) << 21) +#define FA(fa) ((fa) << 16) +#define FB(fb) ((fb) << 11) +#define FC(fc) ((fc) << 6) +#define IMM(imm) ((imm) & 0xffff) +#define CRD(d) ((d) << 21) + +/* Instruction bit sections. + OE and Rc flag (see ALT_SET_FLAGS). */ +#define OERC(flags) (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS)) +/* Rc flag (see ALT_SET_FLAGS). */ +#define RC(flags) ((flags & ALT_SET_FLAGS) >> 10) +#define HI(opcode) ((opcode) << 26) +#define LO(opcode) ((opcode) << 1) + +#define ADD (HI(31) | LO(266)) +#define ADDC (HI(31) | LO(10)) +#define ADDE (HI(31) | LO(138)) +#define ADDI (HI(14)) +#define ADDIC (HI(13)) +#define ADDIS (HI(15)) +#define ADDME (HI(31) | LO(234)) +#define AND (HI(31) | LO(28)) +#define ANDI (HI(28)) +#define ANDIS (HI(29)) +#define Bx (HI(18)) +#define BCx (HI(16)) +#define BCCTR (HI(19) | LO(528) | (3 << 11)) +#define BLR (HI(19) | LO(16) | (0x14 << 21)) +#define CNTLZD (HI(31) | LO(58)) +#define CNTLZW (HI(31) | LO(26)) +#define CMP (HI(31) | LO(0)) +#define CMPI (HI(11)) +#define CMPL (HI(31) | LO(32)) +#define CMPLI (HI(10)) +#define CROR (HI(19) | LO(449)) +#define DIVD (HI(31) | LO(489)) +#define DIVDU (HI(31) | LO(457)) +#define DIVW (HI(31) | LO(491)) +#define DIVWU (HI(31) | LO(459)) +#define EXTSB (HI(31) | LO(954)) +#define EXTSH (HI(31) | LO(922)) +#define EXTSW (HI(31) | LO(986)) +#define FABS (HI(63) | LO(264)) +#define FADD (HI(63) | LO(21)) +#define FADDS (HI(59) | LO(21)) +#define FCFID (HI(63) | LO(846)) +#define FCMPU (HI(63) | LO(0)) +#define FCTIDZ (HI(63) | LO(815)) +#define FCTIWZ (HI(63) | LO(15)) +#define FDIV (HI(63) | LO(18)) +#define FDIVS (HI(59) | LO(18)) +#define FMR (HI(63) | LO(72)) +#define FMUL (HI(63) | LO(25)) +#define FMULS (HI(59) | LO(25)) +#define FNEG (HI(63) | LO(40)) +#define FRSP (HI(63) | LO(12)) +#define FSUB (HI(63) | LO(20)) +#define FSUBS (HI(59) | LO(20)) +#define LD (HI(58) | 0) +#define LWZ (HI(32)) +#define MFCR (HI(31) | LO(19)) +#define MFLR (HI(31) | LO(339) | 0x80000) +#define MFXER (HI(31) | LO(339) | 0x10000) +#define MTCTR (HI(31) | LO(467) | 0x90000) +#define MTLR (HI(31) | LO(467) | 0x80000) +#define MTXER (HI(31) | LO(467) | 0x10000) +#define MULHD (HI(31) | LO(73)) +#define MULHDU (HI(31) | LO(9)) +#define MULHW (HI(31) | LO(75)) +#define MULHWU (HI(31) | LO(11)) +#define MULLD (HI(31) | LO(233)) +#define MULLI (HI(7)) +#define MULLW (HI(31) | LO(235)) +#define NEG (HI(31) | LO(104)) +#define NOP (HI(24)) +#define NOR (HI(31) | LO(124)) +#define OR (HI(31) | LO(444)) +#define ORI (HI(24)) +#define ORIS (HI(25)) +#define RLDICL (HI(30)) +#define RLWINM (HI(21)) +#define SLD (HI(31) | LO(27)) +#define SLW (HI(31) | LO(24)) +#define SRAD (HI(31) | LO(794)) +#define SRADI (HI(31) | LO(413 << 1)) +#define SRAW (HI(31) | LO(792)) +#define SRAWI (HI(31) | LO(824)) +#define SRD (HI(31) | LO(539)) +#define SRW (HI(31) | LO(536)) +#define STD (HI(62) | 0) +#define STDU (HI(62) | 1) +#define STDUX (HI(31) | LO(181)) +#define STFIWX (HI(31) | LO(983)) +#define STW (HI(36)) +#define STWU (HI(37)) +#define STWUX (HI(31) | LO(183)) +#define SUBF (HI(31) | LO(40)) +#define SUBFC (HI(31) | LO(8)) +#define SUBFE (HI(31) | LO(136)) +#define SUBFIC (HI(8)) +#define XOR (HI(31) | LO(316)) +#define XORI (HI(26)) +#define XORIS (HI(27)) + +#define SIMM_MAX (0x7fff) +#define SIMM_MIN (-0x8000) +#define UIMM_MAX (0xffff) + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func) +{ + sljit_sw* ptrs; + if (func_ptr) + *func_ptr = (void*)context; + ptrs = (sljit_sw*)func; + context->addr = addr ? addr : ptrs[0]; + context->r2 = ptrs[1]; + context->r11 = ptrs[2]; +} +#endif + +static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_sw extra_jump_flags; + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL)) + return 0; +#else + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return 0; +#endif + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size); + } + +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (jump->flags & IS_CALL) + goto keep_address; +#endif + + diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l; + + extra_jump_flags = 0; + if (jump->flags & IS_COND) { + if (diff <= 0x7fff && diff >= -0x8000) { + jump->flags |= PATCH_B; + return 1; + } + if (target_addr <= 0xffff) { + jump->flags |= PATCH_B | PATCH_ABS_B; + return 1; + } + extra_jump_flags = REMOVE_COND; + + diff -= sizeof(sljit_ins); + } + + if (diff <= 0x01ffffff && diff >= -0x02000000) { + jump->flags |= PATCH_B | extra_jump_flags; + return 1; + } + if (target_addr <= 0x03ffffff) { + jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags; + return 1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) +keep_address: +#endif + if (target_addr <= 0x7fffffff) { + jump->flags |= PATCH_ABS32; + return 1; + } + if (target_addr <= 0x7fffffffffffl) { + jump->flags |= PATCH_ABS48; + return 1; + } +#endif + + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#else + compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins)); +#endif +#endif + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 6); +#endif + if (detect_jump_type(jump, code_ptr, code)) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + code_ptr[-3] = code_ptr[0]; + code_ptr -= 3; +#else + if (jump->flags & PATCH_ABS32) { + code_ptr -= 3; + code_ptr[-1] = code_ptr[2]; + code_ptr[0] = code_ptr[3]; + } + else if (jump->flags & PATCH_ABS48) { + code_ptr--; + code_ptr[-1] = code_ptr[0]; + code_ptr[0] = code_ptr[1]; + /* rldicr rX,rX,32,31 -> rX,rX,16,47 */ + SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6); + code_ptr[-3] ^= 0x8422; + /* oris -> ori */ + code_ptr[-2] ^= 0x4000000; + } + else { + code_ptr[-6] = code_ptr[0]; + code_ptr -= 6; + } +#endif + if (jump->flags & REMOVE_COND) { + code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001); + code_ptr++; + jump->addr += sizeof(sljit_ins); + code_ptr[0] = Bx; + jump->flags -= IS_COND; + } + } + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins))); +#else + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); +#endif + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins*)jump->addr; + if (jump->flags & PATCH_B) { + if (jump->flags & IS_COND) { + if (!(jump->flags & PATCH_ABS_B)) { + addr = addr - jump->addr; + SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); + *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); + } + else { + SLJIT_ASSERT(addr <= 0xffff); + *buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001); + } + } + else { + if (!(jump->flags & PATCH_ABS_B)) { + addr = addr - jump->addr; + SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); + *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); + } + else { + SLJIT_ASSERT(addr <= 0x03ffffff); + *buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1); + } + } + break; + } + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); +#else + if (jump->flags & PATCH_ABS32) { + SLJIT_ASSERT(addr <= 0x7fffffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff); + break; + } + if (jump->flags & PATCH_ABS48) { + SLJIT_ASSERT(addr <= 0x7fffffffffff); + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff); + break; + } + buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff); + buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff); + buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff); + buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff); +#endif + } while (0); + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + SLJIT_CACHE_FLUSH(code, code_ptr); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (((sljit_sw)code_ptr) & 0x4) + code_ptr++; + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); + return code_ptr; +#else + sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); + return code_ptr; +#endif +#else + return code; +#endif +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* inp_flags: */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define INDEXED 0x02 +#define WRITE_BACK 0x04 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x08 +#define HALF_DATA 0x10 +#define INT_DATA 0x18 +#define SIGNED_DATA 0x20 +/* Separates integer and floating point registers */ +#define GPR_REG 0x3f +#define DOUBLE_DATA 0x40 + +#define MEM_MASK 0x7f + +/* Other inp_flags. */ + +#define ARG_TEST 0x000100 +/* Integer opertion and set flags -> requires exts on 64 bit systems. */ +#define ALT_SIGN_EXT 0x000200 +/* This flag affects the RC() and OERC() macros. */ +#define ALT_SET_FLAGS 0x000400 +#define ALT_KEEP_CACHE 0x000800 +#define ALT_FORM1 0x010000 +#define ALT_FORM2 0x020000 +#define ALT_FORM3 0x040000 +#define ALT_FORM4 0x080000 +#define ALT_FORM5 0x100000 +#define ALT_FORM6 0x200000 + +/* Source and destination is register. */ +#define REG_DEST 0x000001 +#define REG1_SOURCE 0x000002 +#define REG2_SOURCE 0x000004 +/* getput_arg_fast returned true. */ +#define FAST_DEST 0x000008 +/* Multiple instructions are required. */ +#define SLOW_DEST 0x000010 +/* +ALT_SIGN_EXT 0x000200 +ALT_SET_FLAGS 0x000400 +ALT_FORM1 0x010000 +... +ALT_FORM6 0x200000 */ + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#include "sljitNativePPC_32.c" +#else +#include "sljitNativePPC_64.c" +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define STACK_STORE STW +#define STACK_LOAD LWZ +#else +#define STACK_STORE STD +#define STACK_LOAD LD +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si i, tmp, offs; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + FAIL_IF(push_inst(compiler, MFLR | D(0))); + offs = -(sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + offs -= (sljit_si)(sizeof(sljit_sw)); + FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs))); + } + + SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1)); + +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); +#else + FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); +#endif + + FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0)); + if (args >= 1) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0))); + if (args >= 2) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1))); + if (args >= 3) + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2))); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + local_size = (local_size + 15) & ~0xf; + compiler->local_size = local_size; + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, -local_size)); + FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); + } +#else + if (local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, -local_size)); + FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0))); + } +#endif + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET; + compiler->local_size = (local_size + 15) & ~0xf; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + sljit_si i, tmp, offs; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + if (compiler->local_size <= SIMM_MAX) + FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size))); + else { + FAIL_IF(load_immediate(compiler, 0, compiler->local_size)); + FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0))); + } + +#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw)))); +#else + FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw)))); +#endif + + offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1); + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs))); + offs += (sljit_si)(sizeof(sljit_sw)); + } + + FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs))); + SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw))); + + FAIL_IF(push_inst(compiler, MTLR | S(0))); + FAIL_IF(push_inst(compiler, BLR)); + + return SLJIT_SUCCESS; +} + +#undef STACK_STORE +#undef STACK_LOAD + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* i/x - immediate/indexed form + n/w - no write-back / write-back (1 bit) + s/l - store/load (1 bit) + u/s - signed/unsigned (1 bit) + w/b/h/i - word/byte/half/int allowed (2 bit) + It contans 32 items, but not all are different. */ + +/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */ +#define INT_ALIGNED 0x10000 +/* 64-bit only: there is no lwau instruction. */ +#define UPDATE_REQ 0x20000 + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) +#define ARCH_32_64(a, b) a +#define INST_CODE_AND_DST(inst, flags, reg) \ + ((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) +#else +#define ARCH_32_64(a, b) b +#define INST_CODE_AND_DST(inst, flags, reg) \ + (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) +#endif + +static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = { + +/* -------- Unsigned -------- */ + +/* Word. */ + +/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), +/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), +/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), + +/* Byte. */ + +/* u b n i s */ HI(38) /* stb */, +/* u b n i l */ HI(34) /* lbz */, +/* u b n x s */ HI(31) | LO(215) /* stbx */, +/* u b n x l */ HI(31) | LO(87) /* lbzx */, + +/* u b w i s */ HI(39) /* stbu */, +/* u b w i l */ HI(35) /* lbzu */, +/* u b w x s */ HI(31) | LO(247) /* stbux */, +/* u b w x l */ HI(31) | LO(119) /* lbzux */, + +/* Half. */ + +/* u h n i s */ HI(44) /* sth */, +/* u h n i l */ HI(40) /* lhz */, +/* u h n x s */ HI(31) | LO(407) /* sthx */, +/* u h n x l */ HI(31) | LO(279) /* lhzx */, + +/* u h w i s */ HI(45) /* sthu */, +/* u h w i l */ HI(41) /* lhzu */, +/* u h w x s */ HI(31) | LO(439) /* sthux */, +/* u h w x l */ HI(31) | LO(311) /* lhzux */, + +/* Int. */ + +/* u i n i s */ HI(36) /* stw */, +/* u i n i l */ HI(32) /* lwz */, +/* u i n x s */ HI(31) | LO(151) /* stwx */, +/* u i n x l */ HI(31) | LO(23) /* lwzx */, + +/* u i w i s */ HI(37) /* stwu */, +/* u i w i l */ HI(33) /* lwzu */, +/* u i w x s */ HI(31) | LO(183) /* stwux */, +/* u i w x l */ HI(31) | LO(55) /* lwzux */, + +/* -------- Signed -------- */ + +/* Word. */ + +/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */), +/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */), +/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */), +/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */), + +/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */), +/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */), +/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */), +/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */), + +/* Byte. */ + +/* s b n i s */ HI(38) /* stb */, +/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */, +/* s b n x s */ HI(31) | LO(215) /* stbx */, +/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */, + +/* s b w i s */ HI(39) /* stbu */, +/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */, +/* s b w x s */ HI(31) | LO(247) /* stbux */, +/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */, + +/* Half. */ + +/* s h n i s */ HI(44) /* sth */, +/* s h n i l */ HI(42) /* lha */, +/* s h n x s */ HI(31) | LO(407) /* sthx */, +/* s h n x l */ HI(31) | LO(343) /* lhax */, + +/* s h w i s */ HI(45) /* sthu */, +/* s h w i l */ HI(43) /* lhau */, +/* s h w x s */ HI(31) | LO(439) /* sthux */, +/* s h w x l */ HI(31) | LO(375) /* lhaux */, + +/* Int. */ + +/* s i n i s */ HI(36) /* stw */, +/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */), +/* s i n x s */ HI(31) | LO(151) /* stwx */, +/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */), + +/* s i w i s */ HI(37) /* stwu */, +/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */), +/* s i w x s */ HI(31) | LO(183) /* stwux */, +/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */), + +/* -------- Double -------- */ + +/* d n i s */ HI(54) /* stfd */, +/* d n i l */ HI(50) /* lfd */, +/* d n x s */ HI(31) | LO(727) /* stfdx */, +/* d n x l */ HI(31) | LO(599) /* lfdx */, + +/* s n i s */ HI(52) /* stfs */, +/* s n i l */ HI(48) /* lfs */, +/* s n x s */ HI(31) | LO(663) /* stfsx */, +/* s n x l */ HI(31) | LO(535) /* lfsx */, + +}; + +#undef ARCH_32_64 + +/* Simple cases, (no caching is required). */ +static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + sljit_ins inst; + + /* Should work when (arg & REG_MASK) == 0. */ + SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0); + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (arg & OFFS_REG_MASK) { + if (argw & 0x3) + return 0; + if (inp_flags & ARG_TEST) + return 1; + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg)))); + return -1; + } + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) + inp_flags &= ~WRITE_BACK; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + inst = data_transfer_insts[inp_flags & MEM_MASK]; + SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ)); + + if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ)) + return 0; + if (inp_flags & ARG_TEST) + return 1; +#endif + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + if (argw > SIMM_MAX || argw < SIMM_MIN) + return 0; + if (inp_flags & ARG_TEST) + return 1; + + inst = data_transfer_insts[inp_flags & MEM_MASK]; + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); +#endif + + FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw))); + return -1; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those operator always + uses word arguments without write back. */ +static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_sw high_short, next_high_short; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_sw diff; +#endif + + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + if (arg & OFFS_REG_MASK) + return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3)); + + if (next_arg & OFFS_REG_MASK) + return 0; + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff; + next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; + return high_short == next_high_short; +#else + if (argw <= 0x7fffffffl && argw >= -0x80000000l) { + high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff; + next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; + if (high_short == next_high_short) + return 1; + } + + diff = argw - next_argw; + if (!(arg & REG_MASK)) + return diff <= SIMM_MAX && diff >= SIMM_MIN; + + if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN) + return 1; + + return 0; +#endif +} + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define ADJUST_CACHED_IMM(imm) \ + if ((inst & INT_ALIGNED) && (imm & 0x3)) { \ + /* Adjust cached value. Fortunately this is really a rare case */ \ + compiler->cache_argw += imm & 0x3; \ + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \ + imm &= ~0x3; \ + } +#endif + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_si tmp_r; + sljit_ins inst; + sljit_sw high_short, next_high_short; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_sw diff; +#endif + + SLJIT_ASSERT(arg & SLJIT_MEM); + + tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1; + /* Special case for "mov reg, [reg, ... ]". */ + if ((arg & REG_MASK) == tmp_r) + tmp_r = TMP_REG1; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + /* Otherwise getput_arg_fast would capture it. */ + SLJIT_ASSERT(argw); + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw) + tmp_r = TMP_REG3; + else { + if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1))); +#endif + } + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r)); + } + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) + inp_flags &= ~WRITE_BACK; + + inst = data_transfer_insts[inp_flags & MEM_MASK]; + SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (argw <= 0x7fff7fffl && argw >= -0x80000000l + && (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) { +#endif + + arg &= REG_MASK; + high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff; + /* The getput_arg_fast should handle this otherwise. */ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l); +#else + SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ))); +#endif + + if (inp_flags & WRITE_BACK) { + if (arg == reg) { + FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg))); + reg = tmp_r; + } + tmp_r = arg; + FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16))); + } + else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) { + if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) { + next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff; + if (high_short == next_high_short) { + compiler->cache_arg = SLJIT_MEM | arg; + compiler->cache_argw = high_short; + tmp_r = TMP_REG3; + } + } + FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16))); + } + else + tmp_r = TMP_REG3; + + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw)); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + } + + /* Everything else is PPC-64 only. */ + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + diff = argw - compiler->cache_argw; + if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) { + ADJUST_CACHED_IMM(diff); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff)); + } + + diff = argw - next_argw; + if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } + + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r)); + } + + diff = argw - compiler->cache_argw; + if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) { + SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ)); + ADJUST_CACHED_IMM(diff); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff)); + } + + if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) { + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + if (compiler->cache_argw != argw) { + FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff))); + compiler->cache_argw = argw; + } + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3)); + } + + if (argw == next_argw && (next_arg & SLJIT_MEM)) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3)); + } + + diff = argw - next_argw; + if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK))); + + compiler->cache_arg = arg; + compiler->cache_argw = argw; + + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3)); + } + + if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) { + SLJIT_ASSERT(inp_flags & LOAD_DATA); + FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + + compiler->cache_arg = SLJIT_IMM; + compiler->cache_argw = argw; + tmp_r = TMP_REG3; + } + else + FAIL_IF(load_immediate(compiler, tmp_r, argw)); + + /* Get the indexed version instead of the normal one. */ + inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK]; + SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ))); + return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r)); +#endif +} + +static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_si dst_r; + sljit_si src1_r; + sljit_si src2_r; + sljit_si sugg_src2_r = TMP_REG2; + sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS); + + if (!(input_flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + /* Destination check. */ + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + dst_r = TMP_REG2; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } + else { + SLJIT_ASSERT(dst & SLJIT_MEM); + if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) { + flags |= FAST_DEST; + dst_r = TMP_REG2; + } + else { + flags |= SLOW_DEST; + dst_r = 0; + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } + else if (src1 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1_r = TMP_REG1; + } + else + src1_r = 0; + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } + else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { + FAIL_IF(compiler->error); + src2_r = sugg_src2_r; + } + else + src2_r = 0; + + /* src1_r, src2_r and dst_r can be zero (=unprocessed). + All arguments are complex addressing modes, and it is a binary operator. */ + if (src1_r == 0 && src2_r == 0 && dst_r == 0) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + src1_r = TMP_REG1; + src2_r = TMP_REG2; + } + else if (src1_r == 0 && src2_r == 0) { + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + src1_r = TMP_REG1; + } + else if (src1_r == 0 && dst_r == 0) { + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + src1_r = TMP_REG1; + } + else if (src2_r == 0 && dst_r == 0) { + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + src2_r = sugg_src2_r; + } + + if (dst_r == 0) + dst_r = TMP_REG2; + + if (src1_r == 0) { + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); + src1_r = TMP_REG1; + } + + if (src2_r == 0) { + FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); + src2_r = sugg_src2_r; + } + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (flags & (FAST_DEST | SLOW_DEST)) { + if (flags & FAST_DEST) + FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw)); + else + FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0)); + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + sljit_si int_op = op & SLJIT_INT_OP; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + case SLJIT_NOP: + return push_inst(compiler, NOP); + case SLJIT_LUMUL: + case SLJIT_LSMUL: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LUMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); +#else + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + return push_inst(compiler, (op == SLJIT_LUMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); +#endif + case SLJIT_LUDIV: + case SLJIT_LSDIV: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (int_op) { + FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); + } else { + FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); + } + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); +#else + FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); + return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); +#endif + } + + return SLJIT_SUCCESS; +} + +#define EMIT_MOV(type, type_flags, type_cast) \ + emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw) + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + sljit_si op_flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + op = GET_OPCODE(op); + if ((src & SLJIT_IMM) && srcw == 0) + src = TMP_ZERO; + + if (op_flags & SLJIT_SET_O) + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + + if (op_flags & SLJIT_INT_OP) { + if (op < SLJIT_NOT) { + if (FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) + op = SLJIT_MOV_UI; + if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) + op = SLJIT_MOVU_UI; + if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) + op = SLJIT_MOV_SI; + if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) + op = SLJIT_MOVU_SI; +#endif + } +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + else { + /* Most operations expect sign extended arguments. */ + flags |= INT_DATA | SIGNED_DATA; + if (src & SLJIT_IMM) + srcw = (sljit_si)srcw; + } +#endif + } + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: +#endif + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + case SLJIT_MOV_UI: + return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui)); + + case SLJIT_MOV_SI: + return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si)); +#endif + + case SLJIT_MOV_UB: + return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub)); + + case SLJIT_MOV_SB: + return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb)); + + case SLJIT_MOV_UH: + return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh)); + + case SLJIT_MOV_SH: + return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh)); + + case SLJIT_MOVU: + case SLJIT_MOVU_P: +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + case SLJIT_MOVU_UI: + case SLJIT_MOVU_SI: +#endif + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + case SLJIT_MOVU_UI: + return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui)); + + case SLJIT_MOVU_SI: + return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si)); +#endif + + case SLJIT_MOVU_UB: + return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub)); + + case SLJIT_MOVU_SB: + return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb)); + + case SLJIT_MOVU_UH: + return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh)); + + case SLJIT_MOVU_SH: + return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh)); + + case SLJIT_NOT: + return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_CLZ: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw); +#else + return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw); +#endif + } + + return SLJIT_SUCCESS; +} + +#undef EMIT_MOV + +#define TEST_SL_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN) + +#define TEST_UL_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffff)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_SH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l) +#else +#define TEST_SH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & 0xffff)) +#endif + +#define TEST_UH_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000)) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_ADD_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l) +#else +#define TEST_ADD_IMM(src, srcw) \ + ((src) & SLJIT_IMM) +#endif + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_UI_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff)) +#else +#define TEST_UI_IMM(src, srcw) \ + ((src) & SLJIT_IMM) +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + if ((src1 & SLJIT_IMM) && src1w == 0) + src1 = TMP_ZERO; + if ((src2 & SLJIT_IMM) && src2w == 0) + src2 = TMP_ZERO; + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) { + /* Most operations expect sign extended arguments. */ + flags |= INT_DATA | SIGNED_DATA; + if (src1 & SLJIT_IMM) + src1w = (sljit_si)(src1w); + if (src2 & SLJIT_IMM) + src2w = (sljit_si)(src2w); + if (GET_FLAGS(op)) + flags |= ALT_SIGN_EXT; + } +#endif + if (op & SLJIT_SET_O) + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); + if (src2 == TMP_REG2) + flags |= ALT_KEEP_CACHE; + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src2, src2w)) { + compiler->imm = (src2w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src1, src1w)) { + compiler->imm = (src1w >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_ADD_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_ADDC: + return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_SH_IMM(src2, -src2w)) { + compiler->imm = ((-src2w) >> 16) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, -src2w)) { + compiler->imm = -src2w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + } + if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) { + if (!(op & SLJIT_SET_U)) { + /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) { + /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } + if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) { + compiler->imm = src2w; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + } + if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) { + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + } + /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUBC: + return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) + flags |= ALT_FORM2; +#endif + if (!GET_FLAGS(op)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + /* Commutative unsigned operations. */ + if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UL_IMM(src1, src1w)) { + compiler->imm = src1w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src2, src2w)) { + compiler->imm = (src2w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UH_IMM(src1, src1w)) { + compiler->imm = (src1w >> 16) & 0xffff; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) { + if (TEST_UI_IMM(src2, src2w)) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_UI_IMM(src1, src1w)) { + compiler->imm = src1w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); + } + } + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_ASHR: + if (op & SLJIT_KEEP_FLAGS) + flags |= ALT_FORM3; + /* Fall through. */ + case SLJIT_SHL: + case SLJIT_LSHR: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) + flags |= ALT_FORM2; +#endif + if (src2 & SLJIT_IMM) { + compiler->imm = src2w; + return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif +} + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6)) +#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw)) + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw)) +#else +#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw)) +#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw)) +#endif + +#endif /* SLJIT_CONFIG_PPC_64 */ + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + /* We can ignore the temporary data store on the stack from caching point of view. */ + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + op = GET_OPCODE(op); + FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (op == SLJIT_CONVW_FROMD) { + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); + } + return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0); + } + +#else + FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src))); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; +#endif + + if (FAST_IS_REG(dst)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1))); + return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0); + } + + SLJIT_ASSERT(dst & SLJIT_MEM); + + if (dst & OFFS_REG_MASK) { + dstw &= 0x3; + if (dstw) { +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1))); +#endif + dstw = TMP_REG1; + } + else + dstw = OFFS_REG(dst); + } + else { + if ((dst & REG_MASK) && !dstw) { + dstw = dst & REG_MASK; + dst = 0; + } + else { + /* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */ + FAIL_IF(load_immediate(compiler, TMP_REG1, dstw)); + dstw = TMP_REG1; + } + } + + return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw)); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_IMM) { + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) { + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1))); + else + FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + src = TMP_REG1; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw)); + } + else + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + + FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + if (op & SLJIT_SINGLE_OP) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#else + + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + sljit_si invert_sign = 1; + + if (src & SLJIT_IMM) { + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000)); + src = TMP_REG1; + invert_sign = 0; + } + else if (!FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + src = TMP_REG1; + } + + /* First, a special double floating point value is constructed: (2^53 + (input xor (2^31))) + The double precision format has exactly 53 bit precision, so the lower 32 bit represents + the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000 + to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating + point value, we need to substract 2^53 + 2^31 from the constructed value. */ + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330)); + if (invert_sign) + FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI)); + FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW)); + + FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2))); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + if (op & SLJIT_SINGLE_OP) + return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r)); + return SLJIT_SUCCESS; + +#endif +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + + return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + + switch (GET_OPCODE(op)) { + case SLJIT_CONVD_FROMS: + op ^= SLJIT_SINGLE_OP; + if (op & SLJIT_SINGLE_OP) { + FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src))); + break; + } + /* Fall through. */ + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) + FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src))); + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src))); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src))); + break; + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= ALT_FORM1; + } + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= ALT_FORM2; + } + + if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & ALT_FORM1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & ALT_FORM2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & ALT_FORM1) + src1 = TMP_FREG1; + if (flags & ALT_FORM2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2))); + break; + + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2))); + break; + + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */)); + break; + + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2))); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +#undef FLOAT_DATA +#undef SELECT_FOP + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, MFLR | D(dst)); + + /* Memory. */ + FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2))); + return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, MTLR | S(src))); + else { + if (src & SLJIT_MEM) + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2))); + } + return push_inst(compiler, BLR); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +static sljit_ins get_bo_bi_flags(sljit_si type) +{ + switch (type) { + case SLJIT_EQUAL: + return (12 << 21) | (2 << 16); + + case SLJIT_NOT_EQUAL: + return (4 << 21) | (2 << 16); + + case SLJIT_LESS: + case SLJIT_D_LESS: + return (12 << 21) | ((4 + 0) << 16); + + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: + return (4 << 21) | ((4 + 0) << 16); + + case SLJIT_GREATER: + case SLJIT_D_GREATER: + return (12 << 21) | ((4 + 1) << 16); + + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: + return (4 << 21) | ((4 + 1) << 16); + + case SLJIT_SIG_LESS: + return (12 << 21) | (0 << 16); + + case SLJIT_SIG_GREATER_EQUAL: + return (4 << 21) | (0 << 16); + + case SLJIT_SIG_GREATER: + return (12 << 21) | (1 << 16); + + case SLJIT_SIG_LESS_EQUAL: + return (4 << 21) | (1 << 16); + + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: + return (12 << 21) | (3 << 16); + + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: + return (4 << 21) | (3 << 16); + + case SLJIT_D_EQUAL: + return (12 << 21) | ((4 + 2) << 16); + + case SLJIT_D_NOT_EQUAL: + return (4 << 21) | ((4 + 2) << 16); + + case SLJIT_D_UNORDERED: + return (12 << 21) | ((4 + 3) << 16); + + case SLJIT_D_ORDERED: + return (4 << 21) | ((4 + 3) << 16); + + default: + SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3); + return (20 << 21); + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + struct sljit_jump *jump; + sljit_ins bo_bi_flags; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + bo_bi_flags = get_bo_bi_flags(type & 0xff); + if (!bo_bi_flags) + return NULL; + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + /* In PPC, we don't need to touch the arguments. */ + if (type < SLJIT_JUMP) + jump->flags |= IS_COND; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL0) + jump->flags |= IS_CALL; +#endif + + PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); + PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG))); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0))); + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + sljit_si src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) { +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL0) { + FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src))); + src_r = TMP_CALL_REG; + } + else + src_r = src; +#else + src_r = src; +#endif + } else if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = srcw; +#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) + if (type >= SLJIT_CALL0) + jump->flags |= IS_CALL; +#endif + FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0)); + src_r = TMP_CALL_REG; + } + else { + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw)); + src_r = TMP_CALL_REG; + } + + FAIL_IF(push_inst(compiler, MTCTR | S(src_r))); + if (jump) + jump->addr = compiler->size; + return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); +} + +/* Get a bit from CR, all other bits are zeroed. */ +#define GET_CR_BIT(bit, dst) \ + FAIL_IF(push_inst(compiler, MFCR | D(dst))); \ + FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1))); + +#define INVERT_BIT(dst) \ + FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1)); + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ + sljit_si reg, input_flags; + sljit_si flags = GET_ALL_FLAGS(op); + sljit_sw original_dstw = dstw; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + op = GET_OPCODE(op); + reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { + ADJUST_LOCAL_OFFSET(src, srcw); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA; +#else + input_flags = WORD_DATA; +#endif + FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); + src = TMP_REG1; + srcw = 0; + } + + switch (type & 0xff) { + case SLJIT_EQUAL: + GET_CR_BIT(2, reg); + break; + + case SLJIT_NOT_EQUAL: + GET_CR_BIT(2, reg); + INVERT_BIT(reg); + break; + + case SLJIT_LESS: + case SLJIT_D_LESS: + GET_CR_BIT(4 + 0, reg); + break; + + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: + GET_CR_BIT(4 + 0, reg); + INVERT_BIT(reg); + break; + + case SLJIT_GREATER: + case SLJIT_D_GREATER: + GET_CR_BIT(4 + 1, reg); + break; + + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: + GET_CR_BIT(4 + 1, reg); + INVERT_BIT(reg); + break; + + case SLJIT_SIG_LESS: + GET_CR_BIT(0, reg); + break; + + case SLJIT_SIG_GREATER_EQUAL: + GET_CR_BIT(0, reg); + INVERT_BIT(reg); + break; + + case SLJIT_SIG_GREATER: + GET_CR_BIT(1, reg); + break; + + case SLJIT_SIG_LESS_EQUAL: + GET_CR_BIT(1, reg); + INVERT_BIT(reg); + break; + + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: + GET_CR_BIT(3, reg); + break; + + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: + GET_CR_BIT(3, reg); + INVERT_BIT(reg); + break; + + case SLJIT_D_EQUAL: + GET_CR_BIT(4 + 2, reg); + break; + + case SLJIT_D_NOT_EQUAL: + GET_CR_BIT(4 + 2, reg); + INVERT_BIT(reg); + break; + + case SLJIT_D_UNORDERED: + GET_CR_BIT(4 + 3, reg); + break; + + case SLJIT_D_ORDERED: + GET_CR_BIT(4 + 3, reg); + INVERT_BIT(reg); + break; + + default: + SLJIT_ASSERT_STOP(); + break; + } + + if (op < SLJIT_ADD) { +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op == SLJIT_MOV) + input_flags = WORD_DATA; + else { + op = SLJIT_MOV_UI; + input_flags = INT_DATA; + } +#else + op = SLJIT_MOV; + input_flags = WORD_DATA; +#endif + if (reg != TMP_REG2) + return SLJIT_SUCCESS; + return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + } + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_si reg; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + + PTR_FAIL_IF(emit_const(compiler, reg, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + return const_; +} diff --git a/src/sljit/sljitNativeSPARC_32.c b/src/sljit/sljitNativeSPARC_32.c new file mode 100644 index 0000000..4a2e629 --- /dev/null +++ b/src/sljit/sljitNativeSPARC_32.c @@ -0,0 +1,164 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw imm) +{ + if (imm <= SIMM_MAX && imm >= SIMM_MIN) + return push_inst(compiler, OR | D(dst) | S1(0) | IMM(imm), DR(dst)); + + FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((imm >> 10) & 0x3fffff), DR(dst))); + return (imm & 0x3ff) ? push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (imm & 0x3ff), DR(dst)) : SLJIT_SUCCESS; +} + +#define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2)) + +static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, + sljit_si dst, sljit_si src1, sljit_sw src2) +{ + SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same); + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst)); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_UB) + return push_inst(compiler, AND | D(dst) | S1(src2) | IMM(0xff), DR(dst)); + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst))); + return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst))); + return push_inst(compiler, (op == SLJIT_MOV_SH ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); + } + else if (dst != src2) + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */ + FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS)); + FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS))); + FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst))); + + /* Loop. */ + FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); + FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); + return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS)); + + case SLJIT_ADD: + return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_ADDC: + return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SUB: + return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SUBC: + return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_MUL: + FAIL_IF(push_inst(compiler, SMUL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + if (!(flags & SET_FLAGS)) + return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(dst) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, RDY | D(TMP_LINK), DR(TMP_LINK))); + return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS); + + case SLJIT_AND: + return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_OR: + return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_XOR: + return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); + + case SLJIT_SHL: + FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + + case SLJIT_LSHR: + FAIL_IF(push_inst(compiler, SRL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + + case SLJIT_ASHR: + FAIL_IF(push_inst(compiler, SRA | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst))); + return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw init_value) +{ + FAIL_IF(push_inst(compiler, SETHI | D(dst) | ((init_value >> 10) & 0x3fffff), DR(dst))); + return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst)); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff); + inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + sljit_ins *inst = (sljit_ins*)addr; + + inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff); + inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff); + SLJIT_CACHE_FLUSH(inst, inst + 2); +} diff --git a/src/sljit/sljitNativeSPARC_common.c b/src/sljit/sljitNativeSPARC_common.c new file mode 100644 index 0000000..0b1927a --- /dev/null +++ b/src/sljit/sljitNativeSPARC_common.c @@ -0,0 +1,1430 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ + return "SPARC" SLJIT_CPUINFO; +} + +/* Length of an instruction word + Both for sparc-32 and sparc-64 */ +typedef sljit_ui sljit_ins; + +static void sparc_cache_flush(sljit_ins *from, sljit_ins *to) +{ +#if defined(__SUNPRO_C) && __SUNPRO_C < 0x590 + __asm ( + /* if (from == to) return */ + "cmp %i0, %i1\n" + "be .leave\n" + "nop\n" + + /* loop until from >= to */ + ".mainloop:\n" + "flush %i0\n" + "add %i0, 8, %i0\n" + "cmp %i0, %i1\n" + "bcs .mainloop\n" + "nop\n" + + /* The comparison was done above. */ + "bne .leave\n" + /* nop is not necessary here, since the + sub operation has no side effect. */ + "sub %i0, 4, %i0\n" + "flush %i0\n" + ".leave:" + ); +#else + if (SLJIT_UNLIKELY(from == to)) + return; + + do { + __asm__ volatile ( + "flush %0\n" + : : "r"(from) + ); + /* Operates at least on doubleword. */ + from += 2; + } while (from < to); + + if (from == to) { + /* Flush the last word. */ + from --; + __asm__ volatile ( + "flush %0\n" + : : "r"(from) + ); + } +#endif +} + +/* TMP_REG2 is not used by getput_arg */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) +#define TMP_LINK (SLJIT_NUMBER_OF_REGISTERS + 5) + +#define TMP_FREG1 (0) +#define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { + 0, 8, 9, 10, 13, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 11, 12, 15 +}; + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define D(d) (reg_map[d] << 25) +#define DA(d) ((d) << 25) +#define S1(s1) (reg_map[s1] << 14) +#define S2(s2) (reg_map[s2]) +#define S1A(s1) ((s1) << 14) +#define S2A(s2) (s2) +#define IMM_ARG 0x2000 +#define DOP(op) ((op) << 5) +#define IMM(imm) (((imm) & 0x1fff) | IMM_ARG) + +#define DR(dr) (reg_map[dr]) +#define OPC1(opcode) ((opcode) << 30) +#define OPC2(opcode) ((opcode) << 22) +#define OPC3(opcode) ((opcode) << 19) +#define SET_FLAGS OPC3(0x10) + +#define ADD (OPC1(0x2) | OPC3(0x00)) +#define ADDC (OPC1(0x2) | OPC3(0x08)) +#define AND (OPC1(0x2) | OPC3(0x01)) +#define ANDN (OPC1(0x2) | OPC3(0x05)) +#define CALL (OPC1(0x1)) +#define FABSS (OPC1(0x2) | OPC3(0x34) | DOP(0x09)) +#define FADDD (OPC1(0x2) | OPC3(0x34) | DOP(0x42)) +#define FADDS (OPC1(0x2) | OPC3(0x34) | DOP(0x41)) +#define FCMPD (OPC1(0x2) | OPC3(0x35) | DOP(0x52)) +#define FCMPS (OPC1(0x2) | OPC3(0x35) | DOP(0x51)) +#define FDIVD (OPC1(0x2) | OPC3(0x34) | DOP(0x4e)) +#define FDIVS (OPC1(0x2) | OPC3(0x34) | DOP(0x4d)) +#define FDTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd2)) +#define FDTOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc6)) +#define FITOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc8)) +#define FITOS (OPC1(0x2) | OPC3(0x34) | DOP(0xc4)) +#define FMOVS (OPC1(0x2) | OPC3(0x34) | DOP(0x01)) +#define FMULD (OPC1(0x2) | OPC3(0x34) | DOP(0x4a)) +#define FMULS (OPC1(0x2) | OPC3(0x34) | DOP(0x49)) +#define FNEGS (OPC1(0x2) | OPC3(0x34) | DOP(0x05)) +#define FSTOD (OPC1(0x2) | OPC3(0x34) | DOP(0xc9)) +#define FSTOI (OPC1(0x2) | OPC3(0x34) | DOP(0xd1)) +#define FSUBD (OPC1(0x2) | OPC3(0x34) | DOP(0x46)) +#define FSUBS (OPC1(0x2) | OPC3(0x34) | DOP(0x45)) +#define JMPL (OPC1(0x2) | OPC3(0x38)) +#define NOP (OPC1(0x0) | OPC2(0x04)) +#define OR (OPC1(0x2) | OPC3(0x02)) +#define ORN (OPC1(0x2) | OPC3(0x06)) +#define RDY (OPC1(0x2) | OPC3(0x28) | S1A(0)) +#define RESTORE (OPC1(0x2) | OPC3(0x3d)) +#define SAVE (OPC1(0x2) | OPC3(0x3c)) +#define SETHI (OPC1(0x0) | OPC2(0x04)) +#define SLL (OPC1(0x2) | OPC3(0x25)) +#define SLLX (OPC1(0x2) | OPC3(0x25) | (1 << 12)) +#define SRA (OPC1(0x2) | OPC3(0x27)) +#define SRAX (OPC1(0x2) | OPC3(0x27) | (1 << 12)) +#define SRL (OPC1(0x2) | OPC3(0x26)) +#define SRLX (OPC1(0x2) | OPC3(0x26) | (1 << 12)) +#define SUB (OPC1(0x2) | OPC3(0x04)) +#define SUBC (OPC1(0x2) | OPC3(0x0c)) +#define TA (OPC1(0x2) | OPC3(0x3a) | (8 << 25)) +#define WRY (OPC1(0x2) | OPC3(0x30) | DA(0)) +#define XOR (OPC1(0x2) | OPC3(0x03)) +#define XNOR (OPC1(0x2) | OPC3(0x07)) + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define MAX_DISP (0x1fffff) +#define MIN_DISP (-0x200000) +#define DISP_MASK (0x3fffff) + +#define BICC (OPC1(0x0) | OPC2(0x2)) +#define FBFCC (OPC1(0x0) | OPC2(0x6)) +#define SLL_W SLL +#define SDIV (OPC1(0x2) | OPC3(0x0f)) +#define SMUL (OPC1(0x2) | OPC3(0x0b)) +#define UDIV (OPC1(0x2) | OPC3(0x0e)) +#define UMUL (OPC1(0x2) | OPC3(0x0a)) +#else +#define SLL_W SLLX +#endif + +#define SIMM_MAX (0x0fff) +#define SIMM_MIN (-0x1000) + +/* dest_reg is the absolute name of the register + Useful for reordering instructions in the delay slot. */ +static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_si delay_slot) +{ + sljit_ins *ptr; + SLJIT_ASSERT((delay_slot & DST_INS_MASK) == UNMOVABLE_INS + || (delay_slot & DST_INS_MASK) == MOVABLE_INS + || (delay_slot & DST_INS_MASK) == ((ins >> 25) & 0x1f)); + ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + compiler->delay_slot = delay_slot; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return code_ptr; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size); + } + inst = (sljit_ins*)jump->addr; + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if (jump->flags & IS_CALL) { + /* Call is always patchable on sparc 32. */ + jump->flags |= PATCH_CALL; + if (jump->flags & IS_MOVABLE) { + inst[0] = inst[-1]; + inst[-1] = CALL; + jump->addr -= sizeof(sljit_ins); + return inst; + } + inst[0] = CALL; + inst[1] = NOP; + return inst + 1; + } +#else + /* Both calls and BPr instructions shall not pass this point. */ +#error "Implementation required" +#endif + + if (jump->flags & IS_COND) + inst--; + + if (jump->flags & IS_MOVABLE) { + diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2; + if (diff <= MAX_DISP && diff >= MIN_DISP) { + jump->flags |= PATCH_B; + inst--; + if (jump->flags & IS_COND) { + saved_inst = inst[0]; + inst[0] = inst[1] ^ (1 << 28); + inst[1] = saved_inst; + } else { + inst[1] = inst[0]; + inst[0] = BICC | DA(0x8); + } + jump->addr = (sljit_uw)inst; + return inst + 1; + } + } + + diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2; + if (diff <= MAX_DISP && diff >= MIN_DISP) { + jump->flags |= PATCH_B; + if (jump->flags & IS_COND) + inst[0] ^= (1 << 28); + else + inst[0] = BICC | DA(0x8); + inst[1] = NOP; + jump->addr = (sljit_uw)inst; + return inst + 1; + } + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = (sljit_ins*)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 2); + do { + *code_ptr = *buf_ptr++; + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + if (jump && jump->addr == word_count) { +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + jump->addr = (sljit_uw)(code_ptr - 3); +#else + jump->addr = (sljit_uw)(code_ptr - 6); +#endif + code_ptr = detect_jump_type(jump, code_ptr, code); + jump = jump->next; + } + if (const_ && const_->addr == word_count) { + /* Just recording the address. */ + const_->addr = (sljit_uw)code_ptr; + const_ = const_->next; + } + code_ptr ++; + word_count ++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_si)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins*)jump->addr; + + if (jump->flags & PATCH_CALL) { + addr = (sljit_sw)(addr - jump->addr) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000); + buf_ptr[0] = CALL | (addr & 0x3fffffff); + break; + } + if (jump->flags & PATCH_B) { + addr = (sljit_sw)(addr - jump->addr) >> 2; + SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP); + buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK); + break; + } + + /* Set the fields of immediate loads. */ +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff); + buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff); +#else +#error "Implementation required" +#endif + } while (0); + jump = jump->next; + } + + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + SLJIT_CACHE_FLUSH(code, code_ptr); + return code; +} + +/* --------------------------------------------------------------------- */ +/* Entry, exit */ +/* --------------------------------------------------------------------- */ + +/* Creates an index in data_transfer_insts array. */ +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +/* Separates integer and floating point registers */ +#define GPR_REG 0x0f +#define DOUBLE_DATA 0x10 +#define SINGLE_DATA 0x12 + +#define MEM_MASK 0x1f + +#define WRITE_BACK 0x00020 +#define ARG_TEST 0x00040 +#define ALT_KEEP_CACHE 0x00080 +#define CUMULATIVE_OP 0x00100 +#define IMM_OP 0x00200 +#define SRC2_IMM 0x00400 + +#define REG_DEST 0x00800 +#define REG2_SOURCE 0x01000 +#define SLOW_SRC1 0x02000 +#define SLOW_SRC2 0x04000 +#define SLOW_DEST 0x08000 + +/* SET_FLAGS (0x10 << 19) also belong here! */ + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#include "sljitNativeSPARC_32.c" +#else +#include "sljitNativeSPARC_64.c" +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; + compiler->local_size = local_size; + + if (local_size <= SIMM_MAX) { + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS)); + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, -local_size)); + FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS)); + } + + /* Arguments are in their appropriate registers. */ + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + if (op != SLJIT_MOV || !FAST_IS_REG(src)) { + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + src = SLJIT_R0; + } + + FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS)); + return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(src) | S2(0), UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) +#define ARCH_32_64(a, b) a +#else +#define ARCH_32_64(a, b) b +#endif + +static SLJIT_CONST sljit_ins data_transfer_insts[16 + 4] = { +/* u w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), +/* u w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), +/* u b s */ OPC1(3) | OPC3(0x05) /* stb */, +/* u b l */ OPC1(3) | OPC3(0x01) /* ldub */, +/* u h s */ OPC1(3) | OPC3(0x06) /* sth */, +/* u h l */ OPC1(3) | OPC3(0x02) /* lduh */, +/* u i s */ OPC1(3) | OPC3(0x04) /* stw */, +/* u i l */ OPC1(3) | OPC3(0x00) /* lduw */, + +/* s w s */ ARCH_32_64(OPC1(3) | OPC3(0x04) /* stw */, OPC1(3) | OPC3(0x0e) /* stx */), +/* s w l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x0b) /* ldx */), +/* s b s */ OPC1(3) | OPC3(0x05) /* stb */, +/* s b l */ OPC1(3) | OPC3(0x09) /* ldsb */, +/* s h s */ OPC1(3) | OPC3(0x06) /* sth */, +/* s h l */ OPC1(3) | OPC3(0x0a) /* ldsh */, +/* s i s */ OPC1(3) | OPC3(0x04) /* stw */, +/* s i l */ ARCH_32_64(OPC1(3) | OPC3(0x00) /* lduw */, OPC1(3) | OPC3(0x08) /* ldsw */), + +/* d s */ OPC1(3) | OPC3(0x27), +/* d l */ OPC1(3) | OPC3(0x23), +/* s s */ OPC1(3) | OPC3(0x24), +/* s l */ OPC1(3) | OPC3(0x20), +}; + +#undef ARCH_32_64 + +/* Can perform an operation using at most 1 instruction. */ +static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if (!(flags & WRITE_BACK) || !(arg & REG_MASK)) { + if ((!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) + || ((arg & OFFS_REG_MASK) && (argw & 0x3) == 0)) { + /* Works for both absoulte and relative addresses (immediate case). */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] + | ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg)) + | S1(arg & REG_MASK) | ((arg & OFFS_REG_MASK) ? S2(OFFS_REG(arg)) : IMM(argw)), + ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS)); + return -1; + } + } + return 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + SLJIT_ASSERT(argw); + next_argw &= 0x3; + if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == next_argw) + return 1; + return 0; + } + + if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)) + return 1; + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_si base, arg2, delay_slot; + sljit_ins dest; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + base = arg & REG_MASK; + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + SLJIT_ASSERT(argw != 0); + + /* Using the cache. */ + if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw)) + arg2 = TMP_REG3; + else { + if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + arg2 = TMP_REG3; + } + else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base && reg != OFFS_REG(arg)) + arg2 = reg; + else /* It must be a mov operation, so tmp1 must be free to use. */ + arg2 = TMP_REG1; + FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2))); + } + } + else { + /* Using the cache. */ + if ((compiler->cache_arg == SLJIT_MEM) && (argw - compiler->cache_argw) <= SIMM_MAX && (argw - compiler->cache_argw) >= SIMM_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | S1(TMP_REG3) | IMM(argw - compiler->cache_argw), DR(TMP_REG3))); + compiler->cache_argw = argw; + } + arg2 = TMP_REG3; + } else { + if ((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN) { + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + arg2 = TMP_REG3; + } + else if ((flags & LOAD_DATA) && ((flags & MEM_MASK) <= GPR_REG) && reg != base) + arg2 = reg; + else /* It must be a mov operation, so tmp1 must be free to use. */ + arg2 = TMP_REG1; + FAIL_IF(load_immediate(compiler, arg2, argw)); + } + } + + dest = ((flags & MEM_MASK) <= GPR_REG ? D(reg) : DA(reg)); + delay_slot = ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) ? DR(reg) : MOVABLE_INS; + if (!base) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(arg2) | IMM(0), delay_slot); + if (!(flags & WRITE_BACK)) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot); + FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot)); + return push_inst(compiler, ADD | D(base) | S1(base) | S2(arg2), DR(base)); +} + +static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw) +{ + if (getput_arg_fast(compiler, flags, reg, arg, argw)) + return compiler->error; + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg, arg, argw, 0, 0); +} + +static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg + arg2 goes to TMP_REG2, imm or src reg + TMP_REG3 can be used for caching + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_si dst_r = TMP_REG2; + sljit_si src1_r; + sljit_sw src2_r = 0; + sljit_si sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + } + else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w) { + if (src2w <= SIMM_MAX && src2w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src2w; + } + } + if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { + if (src1w <= SIMM_MAX && src1w >= SIMM_MIN) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_r = src1; + else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_r = TMP_REG1; + } + else + src1_r = 0; + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + dst_r = src2_r; + } + else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w)); + src2_r = sugg_src2_r; + } + else { + src2_r = 0; + if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) + dst_r = 0; + } + } + } + else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, dst_r, dst, dstw); + return compiler->error; + } + return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_BREAKPOINT: + return push_inst(compiler, TA, UNMOVABLE_INS); + case SLJIT_NOP: + return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_LUMUL: + case SLJIT_LSMUL: +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + FAIL_IF(push_inst(compiler, (op == SLJIT_LUMUL ? UMUL : SMUL) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + return push_inst(compiler, RDY | D(SLJIT_R1), DR(SLJIT_R1)); +#else +#error "Implementation required" +#endif + case SLJIT_LUDIV: + case SLJIT_LSDIV: +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if (op == SLJIT_LUDIV) + FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); + else { + FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); + } + FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1))); + FAIL_IF(push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1))); + return SLJIT_SUCCESS; +#else +#error "Implementation required" +#endif + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UI: + return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_SI: + return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UB: + return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + + case SLJIT_MOV_SB: + return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + + case SLJIT_MOV_UH: + return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + + case SLJIT_MOV_SH: + return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + + case SLJIT_MOVU: + case SLJIT_MOVU_P: + return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UI: + return emit_op(compiler, SLJIT_MOV_UI, flags | INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_SI: + return emit_op(compiler, SLJIT_MOV_SI, flags | INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UB: + return emit_op(compiler, SLJIT_MOV_UB, flags | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub)srcw : srcw); + + case SLJIT_MOVU_SB: + return emit_op(compiler, SLJIT_MOV_SB, flags | BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb)srcw : srcw); + + case SLJIT_MOVU_UH: + return emit_op(compiler, SLJIT_MOV_UH, flags | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh)srcw : srcw); + + case SLJIT_MOVU_SH: + return emit_op(compiler, SLJIT_MOV_SH, flags | HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh)srcw : srcw); + + case SLJIT_NOT: + case SLJIT_CLZ: + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si flags = GET_FLAGS(op) ? SET_FLAGS : 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_ADD: + case SLJIT_ADDC: + case SLJIT_MUL: + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + if (src2 & SLJIT_IMM) + src2w &= 0x1f; +#else + SLJIT_ASSERT_STOP(); +#endif + return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg << 1; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif +} + +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 7)) +#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double) +#define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + src = TMP_FREG1; + } + else + src <<= 1; + + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS)); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) { + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET); + } + + /* Store the integer value from a VFP register. */ + return emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, dst, dstw, 0, 0); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + srcw = 0; + } + + if (FAST_IS_REG(src)) { + FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); + src = SLJIT_MEM1(SLJIT_SP); + srcw = FLOAT_TMP_MEM_OFFSET; + } + + FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw)); + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FITOS, FITOD) | DA(dst_r) | S2A(TMP_FREG1), MOVABLE_INS)); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + src1 = TMP_FREG1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); + src2 = TMP_FREG2; + } + else + src2 <<= 1; + + return push_inst(compiler, SELECT_FOP(op, FCMPS, FCMPD) | S1A(src1) | S2A(src2), FCC_IS_SET | MOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + + CHECK_ERROR(); + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) + op ^= SLJIT_SINGLE_OP; + + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG1; + + if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw)); + src = dst_r; + } + else + src <<= 1; + + switch (GET_OPCODE(op)) { + case SLJIT_DMOV: + if (src != dst_r) { + if (dst_r != TMP_FREG1) { + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r) | S2A(src), MOVABLE_INS)); + if (!(op & SLJIT_SINGLE_OP)) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + } + else + dst_r = src; + } + break; + case SLJIT_DNEG: + FAIL_IF(push_inst(compiler, FNEGS | DA(dst_r) | S2A(src), MOVABLE_INS)); + if (dst_r != src && !(op & SLJIT_SINGLE_OP)) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + break; + case SLJIT_DABS: + FAIL_IF(push_inst(compiler, FABSS | DA(dst_r) | S2A(src), MOVABLE_INS)); + if (dst_r != src && !(op & SLJIT_SINGLE_OP)) + FAIL_IF(push_inst(compiler, FMOVS | DA(dst_r | 1) | S2A(src | 1), MOVABLE_INS)); + break; + case SLJIT_CONVD_FROMS: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | DA(dst_r) | S2A(src), MOVABLE_INS)); + op ^= SLJIT_SINGLE_OP; + break; + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0)); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r, flags = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + dst_r = FAST_IS_REG(dst) ? (dst << 1) : TMP_FREG2; + + if (src1 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) { + FAIL_IF(compiler->error); + src1 = TMP_FREG1; + } else + flags |= SLOW_SRC1; + } + else + src1 <<= 1; + + if (src2 & SLJIT_MEM) { + if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) { + FAIL_IF(compiler->error); + src2 = TMP_FREG2; + } else + flags |= SLOW_SRC2; + } + else + src2 <<= 1; + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + } + else { + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + } + } + else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw)); + + if (flags & SLOW_SRC1) + src1 = TMP_FREG1; + if (flags & SLOW_SRC2) + src2 = TMP_FREG2; + + switch (GET_OPCODE(op)) { + case SLJIT_DADD: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADDD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); + break; + + case SLJIT_DSUB: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUBD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); + break; + + case SLJIT_DMUL: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMULD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); + break; + + case SLJIT_DDIV: + FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIVD) | DA(dst_r) | S1A(src1) | S2A(src2), MOVABLE_INS)); + break; + } + + if (dst_r == TMP_FREG2) + FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0)); + + return SLJIT_SUCCESS; +} + +#undef FLOAT_DATA +#undef SELECT_FOP + +/* --------------------------------------------------------------------- */ +/* Other instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst)); + + /* Memory. */ + return emit_op_mem(compiler, WORD_DATA, TMP_LINK, dst, dstw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + FAIL_IF(push_inst(compiler, OR | D(TMP_LINK) | S1(0) | S2(src), DR(TMP_LINK))); + else if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_LINK, src, srcw)); + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, TMP_LINK, srcw)); + + FAIL_IF(push_inst(compiler, JMPL | D(0) | S1(TMP_LINK) | IMM(8), UNMOVABLE_INS)); + return push_inst(compiler, NOP, UNMOVABLE_INS); +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + compiler->delay_slot = UNMOVABLE_INS; + return label; +} + +static sljit_ins get_cc(sljit_si type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_MUL_NOT_OVERFLOW: + case SLJIT_D_NOT_EQUAL: /* Unordered. */ + return DA(0x1); + + case SLJIT_NOT_EQUAL: + case SLJIT_MUL_OVERFLOW: + case SLJIT_D_EQUAL: + return DA(0x9); + + case SLJIT_LESS: + case SLJIT_D_GREATER: /* Unordered. */ + return DA(0x5); + + case SLJIT_GREATER_EQUAL: + case SLJIT_D_LESS_EQUAL: + return DA(0xd); + + case SLJIT_GREATER: + case SLJIT_D_GREATER_EQUAL: /* Unordered. */ + return DA(0xc); + + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS: + return DA(0x4); + + case SLJIT_SIG_LESS: + return DA(0x3); + + case SLJIT_SIG_GREATER_EQUAL: + return DA(0xb); + + case SLJIT_SIG_GREATER: + return DA(0xa); + + case SLJIT_SIG_LESS_EQUAL: + return DA(0x2); + + case SLJIT_OVERFLOW: + case SLJIT_D_UNORDERED: + return DA(0x7); + + case SLJIT_NOT_OVERFLOW: + case SLJIT_D_ORDERED: + return DA(0xf); + + default: + SLJIT_ASSERT_STOP(); + return DA(0x8); + } +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type < SLJIT_D_EQUAL) { + jump->flags |= IS_COND; + if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & ICC_IS_SET)) + jump->flags |= IS_MOVABLE; +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + PTR_FAIL_IF(push_inst(compiler, BICC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); +#else +#error "Implementation required" +#endif + } + else if (type < SLJIT_JUMP) { + jump->flags |= IS_COND; + if (((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) && !(compiler->delay_slot & FCC_IS_SET)) + jump->flags |= IS_MOVABLE; +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + PTR_FAIL_IF(push_inst(compiler, FBFCC | get_cc(type ^ 1) | 5, UNMOVABLE_INS)); +#else +#error "Implementation required" +#endif + } else { + if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_CALL; + } + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + PTR_FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(TMP_REG2) | IMM(0), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + struct sljit_jump *jump = NULL; + sljit_si src_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + src_r = src; + else if (src & SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = srcw; + if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS) + jump->flags |= IS_MOVABLE; + if (type >= SLJIT_FAST_CALL) + jump->flags |= IS_CALL; + + FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + src_r = TMP_REG2; + } + else { + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw)); + src_r = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, JMPL | D(type >= SLJIT_FAST_CALL ? TMP_LINK : 0) | S1(src_r) | IMM(0), UNMOVABLE_INS)); + if (jump) + jump->addr = compiler->size; + return push_inst(compiler, NOP, UNMOVABLE_INS); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ + sljit_si reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0); + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + op = GET_OPCODE(op); + reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); + src = TMP_REG1; + srcw = 0; + } + + type &= 0xff; + if (type < SLJIT_D_EQUAL) + FAIL_IF(push_inst(compiler, BICC | get_cc(type) | 3, UNMOVABLE_INS)); + else + FAIL_IF(push_inst(compiler, FBFCC | get_cc(type) | 3, UNMOVABLE_INS)); + + FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS)); + + if (op >= SLJIT_ADD) + return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); + + return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; +#else +#error "Implementation required" +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ + sljit_si reg; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + + PTR_FAIL_IF(emit_const(compiler, reg, init_value)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + return const_; +} diff --git a/src/sljit/sljitNativeTILEGX-encoder.c b/src/sljit/sljitNativeTILEGX-encoder.c new file mode 100644 index 0000000..7196329 --- /dev/null +++ b/src/sljit/sljitNativeTILEGX-encoder.c @@ -0,0 +1,10159 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* This code is owned by Tilera Corporation, and distributed as part + of multiple projects. In sljit, the code is under BSD licence. */ + +#include +#include +#include +#define BFD_RELOC(x) R_##x + +/* Special registers. */ +#define TREG_LR 55 +#define TREG_SN 56 +#define TREG_ZERO 63 + +/* Canonical name of each register. */ +const char *const tilegx_register_names[] = +{ + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", + "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", + "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", + "r48", "r49", "r50", "r51", "r52", "tp", "sp", "lr", + "sn", "idn0", "idn1", "udn0", "udn1", "udn2", "udn3", "zero" +}; + +enum +{ + R_NONE = 0, + R_TILEGX_NONE = 0, + R_TILEGX_64 = 1, + R_TILEGX_32 = 2, + R_TILEGX_16 = 3, + R_TILEGX_8 = 4, + R_TILEGX_64_PCREL = 5, + R_TILEGX_32_PCREL = 6, + R_TILEGX_16_PCREL = 7, + R_TILEGX_8_PCREL = 8, + R_TILEGX_HW0 = 9, + R_TILEGX_HW1 = 10, + R_TILEGX_HW2 = 11, + R_TILEGX_HW3 = 12, + R_TILEGX_HW0_LAST = 13, + R_TILEGX_HW1_LAST = 14, + R_TILEGX_HW2_LAST = 15, + R_TILEGX_COPY = 16, + R_TILEGX_GLOB_DAT = 17, + R_TILEGX_JMP_SLOT = 18, + R_TILEGX_RELATIVE = 19, + R_TILEGX_BROFF_X1 = 20, + R_TILEGX_JUMPOFF_X1 = 21, + R_TILEGX_JUMPOFF_X1_PLT = 22, + R_TILEGX_IMM8_X0 = 23, + R_TILEGX_IMM8_Y0 = 24, + R_TILEGX_IMM8_X1 = 25, + R_TILEGX_IMM8_Y1 = 26, + R_TILEGX_DEST_IMM8_X1 = 27, + R_TILEGX_MT_IMM14_X1 = 28, + R_TILEGX_MF_IMM14_X1 = 29, + R_TILEGX_MMSTART_X0 = 30, + R_TILEGX_MMEND_X0 = 31, + R_TILEGX_SHAMT_X0 = 32, + R_TILEGX_SHAMT_X1 = 33, + R_TILEGX_SHAMT_Y0 = 34, + R_TILEGX_SHAMT_Y1 = 35, + R_TILEGX_IMM16_X0_HW0 = 36, + R_TILEGX_IMM16_X1_HW0 = 37, + R_TILEGX_IMM16_X0_HW1 = 38, + R_TILEGX_IMM16_X1_HW1 = 39, + R_TILEGX_IMM16_X0_HW2 = 40, + R_TILEGX_IMM16_X1_HW2 = 41, + R_TILEGX_IMM16_X0_HW3 = 42, + R_TILEGX_IMM16_X1_HW3 = 43, + R_TILEGX_IMM16_X0_HW0_LAST = 44, + R_TILEGX_IMM16_X1_HW0_LAST = 45, + R_TILEGX_IMM16_X0_HW1_LAST = 46, + R_TILEGX_IMM16_X1_HW1_LAST = 47, + R_TILEGX_IMM16_X0_HW2_LAST = 48, + R_TILEGX_IMM16_X1_HW2_LAST = 49, + R_TILEGX_IMM16_X0_HW0_PCREL = 50, + R_TILEGX_IMM16_X1_HW0_PCREL = 51, + R_TILEGX_IMM16_X0_HW1_PCREL = 52, + R_TILEGX_IMM16_X1_HW1_PCREL = 53, + R_TILEGX_IMM16_X0_HW2_PCREL = 54, + R_TILEGX_IMM16_X1_HW2_PCREL = 55, + R_TILEGX_IMM16_X0_HW3_PCREL = 56, + R_TILEGX_IMM16_X1_HW3_PCREL = 57, + R_TILEGX_IMM16_X0_HW0_LAST_PCREL = 58, + R_TILEGX_IMM16_X1_HW0_LAST_PCREL = 59, + R_TILEGX_IMM16_X0_HW1_LAST_PCREL = 60, + R_TILEGX_IMM16_X1_HW1_LAST_PCREL = 61, + R_TILEGX_IMM16_X0_HW2_LAST_PCREL = 62, + R_TILEGX_IMM16_X1_HW2_LAST_PCREL = 63, + R_TILEGX_IMM16_X0_HW0_GOT = 64, + R_TILEGX_IMM16_X1_HW0_GOT = 65, + + R_TILEGX_IMM16_X0_HW0_PLT_PCREL = 66, + R_TILEGX_IMM16_X1_HW0_PLT_PCREL = 67, + R_TILEGX_IMM16_X0_HW1_PLT_PCREL = 68, + R_TILEGX_IMM16_X1_HW1_PLT_PCREL = 69, + R_TILEGX_IMM16_X0_HW2_PLT_PCREL = 70, + R_TILEGX_IMM16_X1_HW2_PLT_PCREL = 71, + + R_TILEGX_IMM16_X0_HW0_LAST_GOT = 72, + R_TILEGX_IMM16_X1_HW0_LAST_GOT = 73, + R_TILEGX_IMM16_X0_HW1_LAST_GOT = 74, + R_TILEGX_IMM16_X1_HW1_LAST_GOT = 75, + R_TILEGX_IMM16_X0_HW0_TLS_GD = 78, + R_TILEGX_IMM16_X1_HW0_TLS_GD = 79, + R_TILEGX_IMM16_X0_HW0_TLS_LE = 80, + R_TILEGX_IMM16_X1_HW0_TLS_LE = 81, + R_TILEGX_IMM16_X0_HW0_LAST_TLS_LE = 82, + R_TILEGX_IMM16_X1_HW0_LAST_TLS_LE = 83, + R_TILEGX_IMM16_X0_HW1_LAST_TLS_LE = 84, + R_TILEGX_IMM16_X1_HW1_LAST_TLS_LE = 85, + R_TILEGX_IMM16_X0_HW0_LAST_TLS_GD = 86, + R_TILEGX_IMM16_X1_HW0_LAST_TLS_GD = 87, + R_TILEGX_IMM16_X0_HW1_LAST_TLS_GD = 88, + R_TILEGX_IMM16_X1_HW1_LAST_TLS_GD = 89, + R_TILEGX_IMM16_X0_HW0_TLS_IE = 92, + R_TILEGX_IMM16_X1_HW0_TLS_IE = 93, + + R_TILEGX_IMM16_X0_HW0_LAST_PLT_PCREL = 94, + R_TILEGX_IMM16_X1_HW0_LAST_PLT_PCREL = 95, + R_TILEGX_IMM16_X0_HW1_LAST_PLT_PCREL = 96, + R_TILEGX_IMM16_X1_HW1_LAST_PLT_PCREL = 97, + R_TILEGX_IMM16_X0_HW2_LAST_PLT_PCREL = 98, + R_TILEGX_IMM16_X1_HW2_LAST_PLT_PCREL = 99, + + R_TILEGX_IMM16_X0_HW0_LAST_TLS_IE = 100, + R_TILEGX_IMM16_X1_HW0_LAST_TLS_IE = 101, + R_TILEGX_IMM16_X0_HW1_LAST_TLS_IE = 102, + R_TILEGX_IMM16_X1_HW1_LAST_TLS_IE = 103, + R_TILEGX_TLS_DTPMOD64 = 106, + R_TILEGX_TLS_DTPOFF64 = 107, + R_TILEGX_TLS_TPOFF64 = 108, + R_TILEGX_TLS_DTPMOD32 = 109, + R_TILEGX_TLS_DTPOFF32 = 110, + R_TILEGX_TLS_TPOFF32 = 111, + R_TILEGX_TLS_GD_CALL = 112, + R_TILEGX_IMM8_X0_TLS_GD_ADD = 113, + R_TILEGX_IMM8_X1_TLS_GD_ADD = 114, + R_TILEGX_IMM8_Y0_TLS_GD_ADD = 115, + R_TILEGX_IMM8_Y1_TLS_GD_ADD = 116, + R_TILEGX_TLS_IE_LOAD = 117, + R_TILEGX_IMM8_X0_TLS_ADD = 118, + R_TILEGX_IMM8_X1_TLS_ADD = 119, + R_TILEGX_IMM8_Y0_TLS_ADD = 120, + R_TILEGX_IMM8_Y1_TLS_ADD = 121, + R_TILEGX_GNU_VTINHERIT = 128, + R_TILEGX_GNU_VTENTRY = 129, + R_TILEGX_IRELATIVE = 130, + R_TILEGX_NUM = 131 +}; + +typedef enum +{ + TILEGX_PIPELINE_X0, + TILEGX_PIPELINE_X1, + TILEGX_PIPELINE_Y0, + TILEGX_PIPELINE_Y1, + TILEGX_PIPELINE_Y2, +} tilegx_pipeline; + +typedef unsigned long long tilegx_bundle_bits; + +/* These are the bits that determine if a bundle is in the X encoding. */ +#define TILEGX_BUNDLE_MODE_MASK ((tilegx_bundle_bits)3 << 62) + +enum +{ + /* Maximum number of instructions in a bundle (2 for X, 3 for Y). */ + TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE = 3, + + /* How many different pipeline encodings are there? X0, X1, Y0, Y1, Y2. */ + TILEGX_NUM_PIPELINE_ENCODINGS = 5, + + /* Log base 2 of TILEGX_BUNDLE_SIZE_IN_BYTES. */ + TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES = 3, + + /* Instructions take this many bytes. */ + TILEGX_BUNDLE_SIZE_IN_BYTES = 1 << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES, + + /* Log base 2 of TILEGX_BUNDLE_ALIGNMENT_IN_BYTES. */ + TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES = 3, + + /* Bundles should be aligned modulo this number of bytes. */ + TILEGX_BUNDLE_ALIGNMENT_IN_BYTES = + (1 << TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES), + + /* Number of registers (some are magic, such as network I/O). */ + TILEGX_NUM_REGISTERS = 64, +}; + +/* Make a few "tile_" variables to simplify common code between + architectures. */ + +typedef tilegx_bundle_bits tile_bundle_bits; +#define TILE_BUNDLE_SIZE_IN_BYTES TILEGX_BUNDLE_SIZE_IN_BYTES +#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES +#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \ + TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES + +/* 64-bit pattern for a { bpt ; nop } bundle. */ +#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL + +typedef enum +{ + TILEGX_OP_TYPE_REGISTER, + TILEGX_OP_TYPE_IMMEDIATE, + TILEGX_OP_TYPE_ADDRESS, + TILEGX_OP_TYPE_SPR +} tilegx_operand_type; + +struct tilegx_operand +{ + /* Is this operand a register, immediate or address? */ + tilegx_operand_type type; + + /* The default relocation type for this operand. */ + signed int default_reloc : 16; + + /* How many bits is this value? (used for range checking) */ + unsigned int num_bits : 5; + + /* Is the value signed? (used for range checking) */ + unsigned int is_signed : 1; + + /* Is this operand a source register? */ + unsigned int is_src_reg : 1; + + /* Is this operand written? (i.e. is it a destination register) */ + unsigned int is_dest_reg : 1; + + /* Is this operand PC-relative? */ + unsigned int is_pc_relative : 1; + + /* By how many bits do we right shift the value before inserting? */ + unsigned int rightshift : 2; + + /* Return the bits for this operand to be ORed into an existing bundle. */ + tilegx_bundle_bits (*insert) (int op); + + /* Extract this operand and return it. */ + unsigned int (*extract) (tilegx_bundle_bits bundle); +}; + +typedef enum +{ + TILEGX_OPC_BPT, + TILEGX_OPC_INFO, + TILEGX_OPC_INFOL, + TILEGX_OPC_LD4S_TLS, + TILEGX_OPC_LD_TLS, + TILEGX_OPC_MOVE, + TILEGX_OPC_MOVEI, + TILEGX_OPC_MOVELI, + TILEGX_OPC_PREFETCH, + TILEGX_OPC_PREFETCH_ADD_L1, + TILEGX_OPC_PREFETCH_ADD_L1_FAULT, + TILEGX_OPC_PREFETCH_ADD_L2, + TILEGX_OPC_PREFETCH_ADD_L2_FAULT, + TILEGX_OPC_PREFETCH_ADD_L3, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + TILEGX_OPC_PREFETCH_L1, + TILEGX_OPC_PREFETCH_L1_FAULT, + TILEGX_OPC_PREFETCH_L2, + TILEGX_OPC_PREFETCH_L2_FAULT, + TILEGX_OPC_PREFETCH_L3, + TILEGX_OPC_PREFETCH_L3_FAULT, + TILEGX_OPC_RAISE, + TILEGX_OPC_ADD, + TILEGX_OPC_ADDI, + TILEGX_OPC_ADDLI, + TILEGX_OPC_ADDX, + TILEGX_OPC_ADDXI, + TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXSC, + TILEGX_OPC_AND, + TILEGX_OPC_ANDI, + TILEGX_OPC_BEQZ, + TILEGX_OPC_BEQZT, + TILEGX_OPC_BFEXTS, + TILEGX_OPC_BFEXTU, + TILEGX_OPC_BFINS, + TILEGX_OPC_BGEZ, + TILEGX_OPC_BGEZT, + TILEGX_OPC_BGTZ, + TILEGX_OPC_BGTZT, + TILEGX_OPC_BLBC, + TILEGX_OPC_BLBCT, + TILEGX_OPC_BLBS, + TILEGX_OPC_BLBST, + TILEGX_OPC_BLEZ, + TILEGX_OPC_BLEZT, + TILEGX_OPC_BLTZ, + TILEGX_OPC_BLTZT, + TILEGX_OPC_BNEZ, + TILEGX_OPC_BNEZT, + TILEGX_OPC_CLZ, + TILEGX_OPC_CMOVEQZ, + TILEGX_OPC_CMOVNEZ, + TILEGX_OPC_CMPEQ, + TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPEXCH, + TILEGX_OPC_CMPEXCH4, + TILEGX_OPC_CMPLES, + TILEGX_OPC_CMPLEU, + TILEGX_OPC_CMPLTS, + TILEGX_OPC_CMPLTSI, + TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPLTUI, + TILEGX_OPC_CMPNE, + TILEGX_OPC_CMUL, + TILEGX_OPC_CMULA, + TILEGX_OPC_CMULAF, + TILEGX_OPC_CMULF, + TILEGX_OPC_CMULFR, + TILEGX_OPC_CMULH, + TILEGX_OPC_CMULHR, + TILEGX_OPC_CRC32_32, + TILEGX_OPC_CRC32_8, + TILEGX_OPC_CTZ, + TILEGX_OPC_DBLALIGN, + TILEGX_OPC_DBLALIGN2, + TILEGX_OPC_DBLALIGN4, + TILEGX_OPC_DBLALIGN6, + TILEGX_OPC_DRAIN, + TILEGX_OPC_DTLBPR, + TILEGX_OPC_EXCH, + TILEGX_OPC_EXCH4, + TILEGX_OPC_FDOUBLE_ADD_FLAGS, + TILEGX_OPC_FDOUBLE_ADDSUB, + TILEGX_OPC_FDOUBLE_MUL_FLAGS, + TILEGX_OPC_FDOUBLE_PACK1, + TILEGX_OPC_FDOUBLE_PACK2, + TILEGX_OPC_FDOUBLE_SUB_FLAGS, + TILEGX_OPC_FDOUBLE_UNPACK_MAX, + TILEGX_OPC_FDOUBLE_UNPACK_MIN, + TILEGX_OPC_FETCHADD, + TILEGX_OPC_FETCHADD4, + TILEGX_OPC_FETCHADDGEZ, + TILEGX_OPC_FETCHADDGEZ4, + TILEGX_OPC_FETCHAND, + TILEGX_OPC_FETCHAND4, + TILEGX_OPC_FETCHOR, + TILEGX_OPC_FETCHOR4, + TILEGX_OPC_FINV, + TILEGX_OPC_FLUSH, + TILEGX_OPC_FLUSHWB, + TILEGX_OPC_FNOP, + TILEGX_OPC_FSINGLE_ADD1, + TILEGX_OPC_FSINGLE_ADDSUB2, + TILEGX_OPC_FSINGLE_MUL1, + TILEGX_OPC_FSINGLE_MUL2, + TILEGX_OPC_FSINGLE_PACK1, + TILEGX_OPC_FSINGLE_PACK2, + TILEGX_OPC_FSINGLE_SUB1, + TILEGX_OPC_ICOH, + TILEGX_OPC_ILL, + TILEGX_OPC_INV, + TILEGX_OPC_IRET, + TILEGX_OPC_J, + TILEGX_OPC_JAL, + TILEGX_OPC_JALR, + TILEGX_OPC_JALRP, + TILEGX_OPC_JR, + TILEGX_OPC_JRP, + TILEGX_OPC_LD, + TILEGX_OPC_LD1S, + TILEGX_OPC_LD1S_ADD, + TILEGX_OPC_LD1U, + TILEGX_OPC_LD1U_ADD, + TILEGX_OPC_LD2S, + TILEGX_OPC_LD2S_ADD, + TILEGX_OPC_LD2U, + TILEGX_OPC_LD2U_ADD, + TILEGX_OPC_LD4S, + TILEGX_OPC_LD4S_ADD, + TILEGX_OPC_LD4U, + TILEGX_OPC_LD4U_ADD, + TILEGX_OPC_LD_ADD, + TILEGX_OPC_LDNA, + TILEGX_OPC_LDNA_ADD, + TILEGX_OPC_LDNT, + TILEGX_OPC_LDNT1S, + TILEGX_OPC_LDNT1S_ADD, + TILEGX_OPC_LDNT1U, + TILEGX_OPC_LDNT1U_ADD, + TILEGX_OPC_LDNT2S, + TILEGX_OPC_LDNT2S_ADD, + TILEGX_OPC_LDNT2U, + TILEGX_OPC_LDNT2U_ADD, + TILEGX_OPC_LDNT4S, + TILEGX_OPC_LDNT4S_ADD, + TILEGX_OPC_LDNT4U, + TILEGX_OPC_LDNT4U_ADD, + TILEGX_OPC_LDNT_ADD, + TILEGX_OPC_LNK, + TILEGX_OPC_MF, + TILEGX_OPC_MFSPR, + TILEGX_OPC_MM, + TILEGX_OPC_MNZ, + TILEGX_OPC_MTSPR, + TILEGX_OPC_MUL_HS_HS, + TILEGX_OPC_MUL_HS_HU, + TILEGX_OPC_MUL_HS_LS, + TILEGX_OPC_MUL_HS_LU, + TILEGX_OPC_MUL_HU_HU, + TILEGX_OPC_MUL_HU_LS, + TILEGX_OPC_MUL_HU_LU, + TILEGX_OPC_MUL_LS_LS, + TILEGX_OPC_MUL_LS_LU, + TILEGX_OPC_MUL_LU_LU, + TILEGX_OPC_MULA_HS_HS, + TILEGX_OPC_MULA_HS_HU, + TILEGX_OPC_MULA_HS_LS, + TILEGX_OPC_MULA_HS_LU, + TILEGX_OPC_MULA_HU_HU, + TILEGX_OPC_MULA_HU_LS, + TILEGX_OPC_MULA_HU_LU, + TILEGX_OPC_MULA_LS_LS, + TILEGX_OPC_MULA_LS_LU, + TILEGX_OPC_MULA_LU_LU, + TILEGX_OPC_MULAX, + TILEGX_OPC_MULX, + TILEGX_OPC_MZ, + TILEGX_OPC_NAP, + TILEGX_OPC_NOP, + TILEGX_OPC_NOR, + TILEGX_OPC_OR, + TILEGX_OPC_ORI, + TILEGX_OPC_PCNT, + TILEGX_OPC_REVBITS, + TILEGX_OPC_REVBYTES, + TILEGX_OPC_ROTL, + TILEGX_OPC_ROTLI, + TILEGX_OPC_SHL, + TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADDX, + TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADDX, + TILEGX_OPC_SHLI, + TILEGX_OPC_SHLX, + TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRS, + TILEGX_OPC_SHRSI, + TILEGX_OPC_SHRU, + TILEGX_OPC_SHRUI, + TILEGX_OPC_SHRUX, + TILEGX_OPC_SHRUXI, + TILEGX_OPC_SHUFFLEBYTES, + TILEGX_OPC_ST, + TILEGX_OPC_ST1, + TILEGX_OPC_ST1_ADD, + TILEGX_OPC_ST2, + TILEGX_OPC_ST2_ADD, + TILEGX_OPC_ST4, + TILEGX_OPC_ST4_ADD, + TILEGX_OPC_ST_ADD, + TILEGX_OPC_STNT, + TILEGX_OPC_STNT1, + TILEGX_OPC_STNT1_ADD, + TILEGX_OPC_STNT2, + TILEGX_OPC_STNT2_ADD, + TILEGX_OPC_STNT4, + TILEGX_OPC_STNT4_ADD, + TILEGX_OPC_STNT_ADD, + TILEGX_OPC_SUB, + TILEGX_OPC_SUBX, + TILEGX_OPC_SUBXSC, + TILEGX_OPC_SWINT0, + TILEGX_OPC_SWINT1, + TILEGX_OPC_SWINT2, + TILEGX_OPC_SWINT3, + TILEGX_OPC_TBLIDXB0, + TILEGX_OPC_TBLIDXB1, + TILEGX_OPC_TBLIDXB2, + TILEGX_OPC_TBLIDXB3, + TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADDI, + TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADIFFU, + TILEGX_OPC_V1AVGU, + TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQI, + TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTSI, + TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTUI, + TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1DDOTPU, + TILEGX_OPC_V1DDOTPUA, + TILEGX_OPC_V1DDOTPUS, + TILEGX_OPC_V1DDOTPUSA, + TILEGX_OPC_V1DOTP, + TILEGX_OPC_V1DOTPA, + TILEGX_OPC_V1DOTPU, + TILEGX_OPC_V1DOTPUA, + TILEGX_OPC_V1DOTPUS, + TILEGX_OPC_V1DOTPUSA, + TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1MAXU, + TILEGX_OPC_V1MAXUI, + TILEGX_OPC_V1MINU, + TILEGX_OPC_V1MINUI, + TILEGX_OPC_V1MNZ, + TILEGX_OPC_V1MULTU, + TILEGX_OPC_V1MULU, + TILEGX_OPC_V1MULUS, + TILEGX_OPC_V1MZ, + TILEGX_OPC_V1SADAU, + TILEGX_OPC_V1SADU, + TILEGX_OPC_V1SHL, + TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRS, + TILEGX_OPC_V1SHRSI, + TILEGX_OPC_V1SHRU, + TILEGX_OPC_V1SHRUI, + TILEGX_OPC_V1SUB, + TILEGX_OPC_V1SUBUC, + TILEGX_OPC_V2ADD, + TILEGX_OPC_V2ADDI, + TILEGX_OPC_V2ADDSC, + TILEGX_OPC_V2ADIFFS, + TILEGX_OPC_V2AVGS, + TILEGX_OPC_V2CMPEQ, + TILEGX_OPC_V2CMPEQI, + TILEGX_OPC_V2CMPLES, + TILEGX_OPC_V2CMPLEU, + TILEGX_OPC_V2CMPLTS, + TILEGX_OPC_V2CMPLTSI, + TILEGX_OPC_V2CMPLTU, + TILEGX_OPC_V2CMPLTUI, + TILEGX_OPC_V2CMPNE, + TILEGX_OPC_V2DOTP, + TILEGX_OPC_V2DOTPA, + TILEGX_OPC_V2INT_H, + TILEGX_OPC_V2INT_L, + TILEGX_OPC_V2MAXS, + TILEGX_OPC_V2MAXSI, + TILEGX_OPC_V2MINS, + TILEGX_OPC_V2MINSI, + TILEGX_OPC_V2MNZ, + TILEGX_OPC_V2MULFSC, + TILEGX_OPC_V2MULS, + TILEGX_OPC_V2MULTS, + TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, + TILEGX_OPC_V2PACKL, + TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SADAS, + TILEGX_OPC_V2SADAU, + TILEGX_OPC_V2SADS, + TILEGX_OPC_V2SADU, + TILEGX_OPC_V2SHL, + TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHLSC, + TILEGX_OPC_V2SHRS, + TILEGX_OPC_V2SHRSI, + TILEGX_OPC_V2SHRU, + TILEGX_OPC_V2SHRUI, + TILEGX_OPC_V2SUB, + TILEGX_OPC_V2SUBSC, + TILEGX_OPC_V4ADD, + TILEGX_OPC_V4ADDSC, + TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, + TILEGX_OPC_V4PACKSC, + TILEGX_OPC_V4SHL, + TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHRS, + TILEGX_OPC_V4SHRU, + TILEGX_OPC_V4SUB, + TILEGX_OPC_V4SUBSC, + TILEGX_OPC_WH64, + TILEGX_OPC_XOR, + TILEGX_OPC_XORI, + TILEGX_OPC_NONE +} tilegx_mnemonic; + +enum +{ + TILEGX_MAX_OPERANDS = 4 /* bfexts */ +}; + +struct tilegx_opcode +{ + /* The opcode mnemonic, e.g. "add" */ + const char *name; + + /* The enum value for this mnemonic. */ + tilegx_mnemonic mnemonic; + + /* A bit mask of which of the five pipes this instruction + is compatible with: + X0 0x01 + X1 0x02 + Y0 0x04 + Y1 0x08 + Y2 0x10 */ + unsigned char pipes; + + /* How many operands are there? */ + unsigned char num_operands; + + /* Which register does this write implicitly, or TREG_ZERO if none? */ + unsigned char implicitly_written_register; + + /* Can this be bundled with other instructions (almost always true). */ + unsigned char can_bundle; + + /* The description of the operands. Each of these is an + * index into the tilegx_operands[] table. */ + unsigned char operands[TILEGX_NUM_PIPELINE_ENCODINGS][TILEGX_MAX_OPERANDS]; + + /* A mask of which bits have predefined values for each pipeline. + * This is useful for disassembly. */ + tilegx_bundle_bits fixed_bit_masks[TILEGX_NUM_PIPELINE_ENCODINGS]; + + /* For each bit set in fixed_bit_masks, what the value is for this + * instruction. */ + tilegx_bundle_bits fixed_bit_values[TILEGX_NUM_PIPELINE_ENCODINGS]; +}; + +/* Used for non-textual disassembly into structs. */ +struct tilegx_decoded_instruction +{ + const struct tilegx_opcode *opcode; + const struct tilegx_operand *operands[TILEGX_MAX_OPERANDS]; + long long operand_values[TILEGX_MAX_OPERANDS]; +}; + +enum +{ + ADDI_IMM8_OPCODE_X0 = 1, + ADDI_IMM8_OPCODE_X1 = 1, + ADDI_OPCODE_Y0 = 0, + ADDI_OPCODE_Y1 = 1, + ADDLI_OPCODE_X0 = 1, + ADDLI_OPCODE_X1 = 0, + ADDXI_IMM8_OPCODE_X0 = 2, + ADDXI_IMM8_OPCODE_X1 = 2, + ADDXI_OPCODE_Y0 = 1, + ADDXI_OPCODE_Y1 = 2, + ADDXLI_OPCODE_X0 = 2, + ADDXLI_OPCODE_X1 = 1, + ADDXSC_RRR_0_OPCODE_X0 = 1, + ADDXSC_RRR_0_OPCODE_X1 = 1, + ADDX_RRR_0_OPCODE_X0 = 2, + ADDX_RRR_0_OPCODE_X1 = 2, + ADDX_RRR_0_OPCODE_Y0 = 0, + ADDX_SPECIAL_0_OPCODE_Y1 = 0, + ADD_RRR_0_OPCODE_X0 = 3, + ADD_RRR_0_OPCODE_X1 = 3, + ADD_RRR_0_OPCODE_Y0 = 1, + ADD_SPECIAL_0_OPCODE_Y1 = 1, + ANDI_IMM8_OPCODE_X0 = 3, + ANDI_IMM8_OPCODE_X1 = 3, + ANDI_OPCODE_Y0 = 2, + ANDI_OPCODE_Y1 = 3, + AND_RRR_0_OPCODE_X0 = 4, + AND_RRR_0_OPCODE_X1 = 4, + AND_RRR_5_OPCODE_Y0 = 0, + AND_RRR_5_OPCODE_Y1 = 0, + BEQZT_BRANCH_OPCODE_X1 = 16, + BEQZ_BRANCH_OPCODE_X1 = 17, + BFEXTS_BF_OPCODE_X0 = 4, + BFEXTU_BF_OPCODE_X0 = 5, + BFINS_BF_OPCODE_X0 = 6, + BF_OPCODE_X0 = 3, + BGEZT_BRANCH_OPCODE_X1 = 18, + BGEZ_BRANCH_OPCODE_X1 = 19, + BGTZT_BRANCH_OPCODE_X1 = 20, + BGTZ_BRANCH_OPCODE_X1 = 21, + BLBCT_BRANCH_OPCODE_X1 = 22, + BLBC_BRANCH_OPCODE_X1 = 23, + BLBST_BRANCH_OPCODE_X1 = 24, + BLBS_BRANCH_OPCODE_X1 = 25, + BLEZT_BRANCH_OPCODE_X1 = 26, + BLEZ_BRANCH_OPCODE_X1 = 27, + BLTZT_BRANCH_OPCODE_X1 = 28, + BLTZ_BRANCH_OPCODE_X1 = 29, + BNEZT_BRANCH_OPCODE_X1 = 30, + BNEZ_BRANCH_OPCODE_X1 = 31, + BRANCH_OPCODE_X1 = 2, + CMOVEQZ_RRR_0_OPCODE_X0 = 5, + CMOVEQZ_RRR_4_OPCODE_Y0 = 0, + CMOVNEZ_RRR_0_OPCODE_X0 = 6, + CMOVNEZ_RRR_4_OPCODE_Y0 = 1, + CMPEQI_IMM8_OPCODE_X0 = 4, + CMPEQI_IMM8_OPCODE_X1 = 4, + CMPEQI_OPCODE_Y0 = 3, + CMPEQI_OPCODE_Y1 = 4, + CMPEQ_RRR_0_OPCODE_X0 = 7, + CMPEQ_RRR_0_OPCODE_X1 = 5, + CMPEQ_RRR_3_OPCODE_Y0 = 0, + CMPEQ_RRR_3_OPCODE_Y1 = 2, + CMPEXCH4_RRR_0_OPCODE_X1 = 6, + CMPEXCH_RRR_0_OPCODE_X1 = 7, + CMPLES_RRR_0_OPCODE_X0 = 8, + CMPLES_RRR_0_OPCODE_X1 = 8, + CMPLES_RRR_2_OPCODE_Y0 = 0, + CMPLES_RRR_2_OPCODE_Y1 = 0, + CMPLEU_RRR_0_OPCODE_X0 = 9, + CMPLEU_RRR_0_OPCODE_X1 = 9, + CMPLEU_RRR_2_OPCODE_Y0 = 1, + CMPLEU_RRR_2_OPCODE_Y1 = 1, + CMPLTSI_IMM8_OPCODE_X0 = 5, + CMPLTSI_IMM8_OPCODE_X1 = 5, + CMPLTSI_OPCODE_Y0 = 4, + CMPLTSI_OPCODE_Y1 = 5, + CMPLTS_RRR_0_OPCODE_X0 = 10, + CMPLTS_RRR_0_OPCODE_X1 = 10, + CMPLTS_RRR_2_OPCODE_Y0 = 2, + CMPLTS_RRR_2_OPCODE_Y1 = 2, + CMPLTUI_IMM8_OPCODE_X0 = 6, + CMPLTUI_IMM8_OPCODE_X1 = 6, + CMPLTU_RRR_0_OPCODE_X0 = 11, + CMPLTU_RRR_0_OPCODE_X1 = 11, + CMPLTU_RRR_2_OPCODE_Y0 = 3, + CMPLTU_RRR_2_OPCODE_Y1 = 3, + CMPNE_RRR_0_OPCODE_X0 = 12, + CMPNE_RRR_0_OPCODE_X1 = 12, + CMPNE_RRR_3_OPCODE_Y0 = 1, + CMPNE_RRR_3_OPCODE_Y1 = 3, + CMULAF_RRR_0_OPCODE_X0 = 13, + CMULA_RRR_0_OPCODE_X0 = 14, + CMULFR_RRR_0_OPCODE_X0 = 15, + CMULF_RRR_0_OPCODE_X0 = 16, + CMULHR_RRR_0_OPCODE_X0 = 17, + CMULH_RRR_0_OPCODE_X0 = 18, + CMUL_RRR_0_OPCODE_X0 = 19, + CNTLZ_UNARY_OPCODE_X0 = 1, + CNTLZ_UNARY_OPCODE_Y0 = 1, + CNTTZ_UNARY_OPCODE_X0 = 2, + CNTTZ_UNARY_OPCODE_Y0 = 2, + CRC32_32_RRR_0_OPCODE_X0 = 20, + CRC32_8_RRR_0_OPCODE_X0 = 21, + DBLALIGN2_RRR_0_OPCODE_X0 = 22, + DBLALIGN2_RRR_0_OPCODE_X1 = 13, + DBLALIGN4_RRR_0_OPCODE_X0 = 23, + DBLALIGN4_RRR_0_OPCODE_X1 = 14, + DBLALIGN6_RRR_0_OPCODE_X0 = 24, + DBLALIGN6_RRR_0_OPCODE_X1 = 15, + DBLALIGN_RRR_0_OPCODE_X0 = 25, + DRAIN_UNARY_OPCODE_X1 = 1, + DTLBPR_UNARY_OPCODE_X1 = 2, + EXCH4_RRR_0_OPCODE_X1 = 16, + EXCH_RRR_0_OPCODE_X1 = 17, + FDOUBLE_ADDSUB_RRR_0_OPCODE_X0 = 26, + FDOUBLE_ADD_FLAGS_RRR_0_OPCODE_X0 = 27, + FDOUBLE_MUL_FLAGS_RRR_0_OPCODE_X0 = 28, + FDOUBLE_PACK1_RRR_0_OPCODE_X0 = 29, + FDOUBLE_PACK2_RRR_0_OPCODE_X0 = 30, + FDOUBLE_SUB_FLAGS_RRR_0_OPCODE_X0 = 31, + FDOUBLE_UNPACK_MAX_RRR_0_OPCODE_X0 = 32, + FDOUBLE_UNPACK_MIN_RRR_0_OPCODE_X0 = 33, + FETCHADD4_RRR_0_OPCODE_X1 = 18, + FETCHADDGEZ4_RRR_0_OPCODE_X1 = 19, + FETCHADDGEZ_RRR_0_OPCODE_X1 = 20, + FETCHADD_RRR_0_OPCODE_X1 = 21, + FETCHAND4_RRR_0_OPCODE_X1 = 22, + FETCHAND_RRR_0_OPCODE_X1 = 23, + FETCHOR4_RRR_0_OPCODE_X1 = 24, + FETCHOR_RRR_0_OPCODE_X1 = 25, + FINV_UNARY_OPCODE_X1 = 3, + FLUSHWB_UNARY_OPCODE_X1 = 4, + FLUSH_UNARY_OPCODE_X1 = 5, + FNOP_UNARY_OPCODE_X0 = 3, + FNOP_UNARY_OPCODE_X1 = 6, + FNOP_UNARY_OPCODE_Y0 = 3, + FNOP_UNARY_OPCODE_Y1 = 8, + FSINGLE_ADD1_RRR_0_OPCODE_X0 = 34, + FSINGLE_ADDSUB2_RRR_0_OPCODE_X0 = 35, + FSINGLE_MUL1_RRR_0_OPCODE_X0 = 36, + FSINGLE_MUL2_RRR_0_OPCODE_X0 = 37, + FSINGLE_PACK1_UNARY_OPCODE_X0 = 4, + FSINGLE_PACK1_UNARY_OPCODE_Y0 = 4, + FSINGLE_PACK2_RRR_0_OPCODE_X0 = 38, + FSINGLE_SUB1_RRR_0_OPCODE_X0 = 39, + ICOH_UNARY_OPCODE_X1 = 7, + ILL_UNARY_OPCODE_X1 = 8, + ILL_UNARY_OPCODE_Y1 = 9, + IMM8_OPCODE_X0 = 4, + IMM8_OPCODE_X1 = 3, + INV_UNARY_OPCODE_X1 = 9, + IRET_UNARY_OPCODE_X1 = 10, + JALRP_UNARY_OPCODE_X1 = 11, + JALRP_UNARY_OPCODE_Y1 = 10, + JALR_UNARY_OPCODE_X1 = 12, + JALR_UNARY_OPCODE_Y1 = 11, + JAL_JUMP_OPCODE_X1 = 0, + JRP_UNARY_OPCODE_X1 = 13, + JRP_UNARY_OPCODE_Y1 = 12, + JR_UNARY_OPCODE_X1 = 14, + JR_UNARY_OPCODE_Y1 = 13, + JUMP_OPCODE_X1 = 4, + J_JUMP_OPCODE_X1 = 1, + LD1S_ADD_IMM8_OPCODE_X1 = 7, + LD1S_OPCODE_Y2 = 0, + LD1S_UNARY_OPCODE_X1 = 15, + LD1U_ADD_IMM8_OPCODE_X1 = 8, + LD1U_OPCODE_Y2 = 1, + LD1U_UNARY_OPCODE_X1 = 16, + LD2S_ADD_IMM8_OPCODE_X1 = 9, + LD2S_OPCODE_Y2 = 2, + LD2S_UNARY_OPCODE_X1 = 17, + LD2U_ADD_IMM8_OPCODE_X1 = 10, + LD2U_OPCODE_Y2 = 3, + LD2U_UNARY_OPCODE_X1 = 18, + LD4S_ADD_IMM8_OPCODE_X1 = 11, + LD4S_OPCODE_Y2 = 1, + LD4S_UNARY_OPCODE_X1 = 19, + LD4U_ADD_IMM8_OPCODE_X1 = 12, + LD4U_OPCODE_Y2 = 2, + LD4U_UNARY_OPCODE_X1 = 20, + LDNA_UNARY_OPCODE_X1 = 21, + LDNT1S_ADD_IMM8_OPCODE_X1 = 13, + LDNT1S_UNARY_OPCODE_X1 = 22, + LDNT1U_ADD_IMM8_OPCODE_X1 = 14, + LDNT1U_UNARY_OPCODE_X1 = 23, + LDNT2S_ADD_IMM8_OPCODE_X1 = 15, + LDNT2S_UNARY_OPCODE_X1 = 24, + LDNT2U_ADD_IMM8_OPCODE_X1 = 16, + LDNT2U_UNARY_OPCODE_X1 = 25, + LDNT4S_ADD_IMM8_OPCODE_X1 = 17, + LDNT4S_UNARY_OPCODE_X1 = 26, + LDNT4U_ADD_IMM8_OPCODE_X1 = 18, + LDNT4U_UNARY_OPCODE_X1 = 27, + LDNT_ADD_IMM8_OPCODE_X1 = 19, + LDNT_UNARY_OPCODE_X1 = 28, + LD_ADD_IMM8_OPCODE_X1 = 20, + LD_OPCODE_Y2 = 3, + LD_UNARY_OPCODE_X1 = 29, + LNK_UNARY_OPCODE_X1 = 30, + LNK_UNARY_OPCODE_Y1 = 14, + LWNA_ADD_IMM8_OPCODE_X1 = 21, + MFSPR_IMM8_OPCODE_X1 = 22, + MF_UNARY_OPCODE_X1 = 31, + MM_BF_OPCODE_X0 = 7, + MNZ_RRR_0_OPCODE_X0 = 40, + MNZ_RRR_0_OPCODE_X1 = 26, + MNZ_RRR_4_OPCODE_Y0 = 2, + MNZ_RRR_4_OPCODE_Y1 = 2, + MODE_OPCODE_YA2 = 1, + MODE_OPCODE_YB2 = 2, + MODE_OPCODE_YC2 = 3, + MTSPR_IMM8_OPCODE_X1 = 23, + MULAX_RRR_0_OPCODE_X0 = 41, + MULAX_RRR_3_OPCODE_Y0 = 2, + MULA_HS_HS_RRR_0_OPCODE_X0 = 42, + MULA_HS_HS_RRR_9_OPCODE_Y0 = 0, + MULA_HS_HU_RRR_0_OPCODE_X0 = 43, + MULA_HS_LS_RRR_0_OPCODE_X0 = 44, + MULA_HS_LU_RRR_0_OPCODE_X0 = 45, + MULA_HU_HU_RRR_0_OPCODE_X0 = 46, + MULA_HU_HU_RRR_9_OPCODE_Y0 = 1, + MULA_HU_LS_RRR_0_OPCODE_X0 = 47, + MULA_HU_LU_RRR_0_OPCODE_X0 = 48, + MULA_LS_LS_RRR_0_OPCODE_X0 = 49, + MULA_LS_LS_RRR_9_OPCODE_Y0 = 2, + MULA_LS_LU_RRR_0_OPCODE_X0 = 50, + MULA_LU_LU_RRR_0_OPCODE_X0 = 51, + MULA_LU_LU_RRR_9_OPCODE_Y0 = 3, + MULX_RRR_0_OPCODE_X0 = 52, + MULX_RRR_3_OPCODE_Y0 = 3, + MUL_HS_HS_RRR_0_OPCODE_X0 = 53, + MUL_HS_HS_RRR_8_OPCODE_Y0 = 0, + MUL_HS_HU_RRR_0_OPCODE_X0 = 54, + MUL_HS_LS_RRR_0_OPCODE_X0 = 55, + MUL_HS_LU_RRR_0_OPCODE_X0 = 56, + MUL_HU_HU_RRR_0_OPCODE_X0 = 57, + MUL_HU_HU_RRR_8_OPCODE_Y0 = 1, + MUL_HU_LS_RRR_0_OPCODE_X0 = 58, + MUL_HU_LU_RRR_0_OPCODE_X0 = 59, + MUL_LS_LS_RRR_0_OPCODE_X0 = 60, + MUL_LS_LS_RRR_8_OPCODE_Y0 = 2, + MUL_LS_LU_RRR_0_OPCODE_X0 = 61, + MUL_LU_LU_RRR_0_OPCODE_X0 = 62, + MUL_LU_LU_RRR_8_OPCODE_Y0 = 3, + MZ_RRR_0_OPCODE_X0 = 63, + MZ_RRR_0_OPCODE_X1 = 27, + MZ_RRR_4_OPCODE_Y0 = 3, + MZ_RRR_4_OPCODE_Y1 = 3, + NAP_UNARY_OPCODE_X1 = 32, + NOP_UNARY_OPCODE_X0 = 5, + NOP_UNARY_OPCODE_X1 = 33, + NOP_UNARY_OPCODE_Y0 = 5, + NOP_UNARY_OPCODE_Y1 = 15, + NOR_RRR_0_OPCODE_X0 = 64, + NOR_RRR_0_OPCODE_X1 = 28, + NOR_RRR_5_OPCODE_Y0 = 1, + NOR_RRR_5_OPCODE_Y1 = 1, + ORI_IMM8_OPCODE_X0 = 7, + ORI_IMM8_OPCODE_X1 = 24, + OR_RRR_0_OPCODE_X0 = 65, + OR_RRR_0_OPCODE_X1 = 29, + OR_RRR_5_OPCODE_Y0 = 2, + OR_RRR_5_OPCODE_Y1 = 2, + PCNT_UNARY_OPCODE_X0 = 6, + PCNT_UNARY_OPCODE_Y0 = 6, + REVBITS_UNARY_OPCODE_X0 = 7, + REVBITS_UNARY_OPCODE_Y0 = 7, + REVBYTES_UNARY_OPCODE_X0 = 8, + REVBYTES_UNARY_OPCODE_Y0 = 8, + ROTLI_SHIFT_OPCODE_X0 = 1, + ROTLI_SHIFT_OPCODE_X1 = 1, + ROTLI_SHIFT_OPCODE_Y0 = 0, + ROTLI_SHIFT_OPCODE_Y1 = 0, + ROTL_RRR_0_OPCODE_X0 = 66, + ROTL_RRR_0_OPCODE_X1 = 30, + ROTL_RRR_6_OPCODE_Y0 = 0, + ROTL_RRR_6_OPCODE_Y1 = 0, + RRR_0_OPCODE_X0 = 5, + RRR_0_OPCODE_X1 = 5, + RRR_0_OPCODE_Y0 = 5, + RRR_0_OPCODE_Y1 = 6, + RRR_1_OPCODE_Y0 = 6, + RRR_1_OPCODE_Y1 = 7, + RRR_2_OPCODE_Y0 = 7, + RRR_2_OPCODE_Y1 = 8, + RRR_3_OPCODE_Y0 = 8, + RRR_3_OPCODE_Y1 = 9, + RRR_4_OPCODE_Y0 = 9, + RRR_4_OPCODE_Y1 = 10, + RRR_5_OPCODE_Y0 = 10, + RRR_5_OPCODE_Y1 = 11, + RRR_6_OPCODE_Y0 = 11, + RRR_6_OPCODE_Y1 = 12, + RRR_7_OPCODE_Y0 = 12, + RRR_7_OPCODE_Y1 = 13, + RRR_8_OPCODE_Y0 = 13, + RRR_9_OPCODE_Y0 = 14, + SHIFT_OPCODE_X0 = 6, + SHIFT_OPCODE_X1 = 6, + SHIFT_OPCODE_Y0 = 15, + SHIFT_OPCODE_Y1 = 14, + SHL16INSLI_OPCODE_X0 = 7, + SHL16INSLI_OPCODE_X1 = 7, + SHL1ADDX_RRR_0_OPCODE_X0 = 67, + SHL1ADDX_RRR_0_OPCODE_X1 = 31, + SHL1ADDX_RRR_7_OPCODE_Y0 = 1, + SHL1ADDX_RRR_7_OPCODE_Y1 = 1, + SHL1ADD_RRR_0_OPCODE_X0 = 68, + SHL1ADD_RRR_0_OPCODE_X1 = 32, + SHL1ADD_RRR_1_OPCODE_Y0 = 0, + SHL1ADD_RRR_1_OPCODE_Y1 = 0, + SHL2ADDX_RRR_0_OPCODE_X0 = 69, + SHL2ADDX_RRR_0_OPCODE_X1 = 33, + SHL2ADDX_RRR_7_OPCODE_Y0 = 2, + SHL2ADDX_RRR_7_OPCODE_Y1 = 2, + SHL2ADD_RRR_0_OPCODE_X0 = 70, + SHL2ADD_RRR_0_OPCODE_X1 = 34, + SHL2ADD_RRR_1_OPCODE_Y0 = 1, + SHL2ADD_RRR_1_OPCODE_Y1 = 1, + SHL3ADDX_RRR_0_OPCODE_X0 = 71, + SHL3ADDX_RRR_0_OPCODE_X1 = 35, + SHL3ADDX_RRR_7_OPCODE_Y0 = 3, + SHL3ADDX_RRR_7_OPCODE_Y1 = 3, + SHL3ADD_RRR_0_OPCODE_X0 = 72, + SHL3ADD_RRR_0_OPCODE_X1 = 36, + SHL3ADD_RRR_1_OPCODE_Y0 = 2, + SHL3ADD_RRR_1_OPCODE_Y1 = 2, + SHLI_SHIFT_OPCODE_X0 = 2, + SHLI_SHIFT_OPCODE_X1 = 2, + SHLI_SHIFT_OPCODE_Y0 = 1, + SHLI_SHIFT_OPCODE_Y1 = 1, + SHLXI_SHIFT_OPCODE_X0 = 3, + SHLXI_SHIFT_OPCODE_X1 = 3, + SHLX_RRR_0_OPCODE_X0 = 73, + SHLX_RRR_0_OPCODE_X1 = 37, + SHL_RRR_0_OPCODE_X0 = 74, + SHL_RRR_0_OPCODE_X1 = 38, + SHL_RRR_6_OPCODE_Y0 = 1, + SHL_RRR_6_OPCODE_Y1 = 1, + SHRSI_SHIFT_OPCODE_X0 = 4, + SHRSI_SHIFT_OPCODE_X1 = 4, + SHRSI_SHIFT_OPCODE_Y0 = 2, + SHRSI_SHIFT_OPCODE_Y1 = 2, + SHRS_RRR_0_OPCODE_X0 = 75, + SHRS_RRR_0_OPCODE_X1 = 39, + SHRS_RRR_6_OPCODE_Y0 = 2, + SHRS_RRR_6_OPCODE_Y1 = 2, + SHRUI_SHIFT_OPCODE_X0 = 5, + SHRUI_SHIFT_OPCODE_X1 = 5, + SHRUI_SHIFT_OPCODE_Y0 = 3, + SHRUI_SHIFT_OPCODE_Y1 = 3, + SHRUXI_SHIFT_OPCODE_X0 = 6, + SHRUXI_SHIFT_OPCODE_X1 = 6, + SHRUX_RRR_0_OPCODE_X0 = 76, + SHRUX_RRR_0_OPCODE_X1 = 40, + SHRU_RRR_0_OPCODE_X0 = 77, + SHRU_RRR_0_OPCODE_X1 = 41, + SHRU_RRR_6_OPCODE_Y0 = 3, + SHRU_RRR_6_OPCODE_Y1 = 3, + SHUFFLEBYTES_RRR_0_OPCODE_X0 = 78, + ST1_ADD_IMM8_OPCODE_X1 = 25, + ST1_OPCODE_Y2 = 0, + ST1_RRR_0_OPCODE_X1 = 42, + ST2_ADD_IMM8_OPCODE_X1 = 26, + ST2_OPCODE_Y2 = 1, + ST2_RRR_0_OPCODE_X1 = 43, + ST4_ADD_IMM8_OPCODE_X1 = 27, + ST4_OPCODE_Y2 = 2, + ST4_RRR_0_OPCODE_X1 = 44, + STNT1_ADD_IMM8_OPCODE_X1 = 28, + STNT1_RRR_0_OPCODE_X1 = 45, + STNT2_ADD_IMM8_OPCODE_X1 = 29, + STNT2_RRR_0_OPCODE_X1 = 46, + STNT4_ADD_IMM8_OPCODE_X1 = 30, + STNT4_RRR_0_OPCODE_X1 = 47, + STNT_ADD_IMM8_OPCODE_X1 = 31, + STNT_RRR_0_OPCODE_X1 = 48, + ST_ADD_IMM8_OPCODE_X1 = 32, + ST_OPCODE_Y2 = 3, + ST_RRR_0_OPCODE_X1 = 49, + SUBXSC_RRR_0_OPCODE_X0 = 79, + SUBXSC_RRR_0_OPCODE_X1 = 50, + SUBX_RRR_0_OPCODE_X0 = 80, + SUBX_RRR_0_OPCODE_X1 = 51, + SUBX_RRR_0_OPCODE_Y0 = 2, + SUBX_RRR_0_OPCODE_Y1 = 2, + SUB_RRR_0_OPCODE_X0 = 81, + SUB_RRR_0_OPCODE_X1 = 52, + SUB_RRR_0_OPCODE_Y0 = 3, + SUB_RRR_0_OPCODE_Y1 = 3, + SWINT0_UNARY_OPCODE_X1 = 34, + SWINT1_UNARY_OPCODE_X1 = 35, + SWINT2_UNARY_OPCODE_X1 = 36, + SWINT3_UNARY_OPCODE_X1 = 37, + TBLIDXB0_UNARY_OPCODE_X0 = 9, + TBLIDXB0_UNARY_OPCODE_Y0 = 9, + TBLIDXB1_UNARY_OPCODE_X0 = 10, + TBLIDXB1_UNARY_OPCODE_Y0 = 10, + TBLIDXB2_UNARY_OPCODE_X0 = 11, + TBLIDXB2_UNARY_OPCODE_Y0 = 11, + TBLIDXB3_UNARY_OPCODE_X0 = 12, + TBLIDXB3_UNARY_OPCODE_Y0 = 12, + UNARY_RRR_0_OPCODE_X0 = 82, + UNARY_RRR_0_OPCODE_X1 = 53, + UNARY_RRR_1_OPCODE_Y0 = 3, + UNARY_RRR_1_OPCODE_Y1 = 3, + V1ADDI_IMM8_OPCODE_X0 = 8, + V1ADDI_IMM8_OPCODE_X1 = 33, + V1ADDUC_RRR_0_OPCODE_X0 = 83, + V1ADDUC_RRR_0_OPCODE_X1 = 54, + V1ADD_RRR_0_OPCODE_X0 = 84, + V1ADD_RRR_0_OPCODE_X1 = 55, + V1ADIFFU_RRR_0_OPCODE_X0 = 85, + V1AVGU_RRR_0_OPCODE_X0 = 86, + V1CMPEQI_IMM8_OPCODE_X0 = 9, + V1CMPEQI_IMM8_OPCODE_X1 = 34, + V1CMPEQ_RRR_0_OPCODE_X0 = 87, + V1CMPEQ_RRR_0_OPCODE_X1 = 56, + V1CMPLES_RRR_0_OPCODE_X0 = 88, + V1CMPLES_RRR_0_OPCODE_X1 = 57, + V1CMPLEU_RRR_0_OPCODE_X0 = 89, + V1CMPLEU_RRR_0_OPCODE_X1 = 58, + V1CMPLTSI_IMM8_OPCODE_X0 = 10, + V1CMPLTSI_IMM8_OPCODE_X1 = 35, + V1CMPLTS_RRR_0_OPCODE_X0 = 90, + V1CMPLTS_RRR_0_OPCODE_X1 = 59, + V1CMPLTUI_IMM8_OPCODE_X0 = 11, + V1CMPLTUI_IMM8_OPCODE_X1 = 36, + V1CMPLTU_RRR_0_OPCODE_X0 = 91, + V1CMPLTU_RRR_0_OPCODE_X1 = 60, + V1CMPNE_RRR_0_OPCODE_X0 = 92, + V1CMPNE_RRR_0_OPCODE_X1 = 61, + V1DDOTPUA_RRR_0_OPCODE_X0 = 161, + V1DDOTPUSA_RRR_0_OPCODE_X0 = 93, + V1DDOTPUS_RRR_0_OPCODE_X0 = 94, + V1DDOTPU_RRR_0_OPCODE_X0 = 162, + V1DOTPA_RRR_0_OPCODE_X0 = 95, + V1DOTPUA_RRR_0_OPCODE_X0 = 163, + V1DOTPUSA_RRR_0_OPCODE_X0 = 96, + V1DOTPUS_RRR_0_OPCODE_X0 = 97, + V1DOTPU_RRR_0_OPCODE_X0 = 164, + V1DOTP_RRR_0_OPCODE_X0 = 98, + V1INT_H_RRR_0_OPCODE_X0 = 99, + V1INT_H_RRR_0_OPCODE_X1 = 62, + V1INT_L_RRR_0_OPCODE_X0 = 100, + V1INT_L_RRR_0_OPCODE_X1 = 63, + V1MAXUI_IMM8_OPCODE_X0 = 12, + V1MAXUI_IMM8_OPCODE_X1 = 37, + V1MAXU_RRR_0_OPCODE_X0 = 101, + V1MAXU_RRR_0_OPCODE_X1 = 64, + V1MINUI_IMM8_OPCODE_X0 = 13, + V1MINUI_IMM8_OPCODE_X1 = 38, + V1MINU_RRR_0_OPCODE_X0 = 102, + V1MINU_RRR_0_OPCODE_X1 = 65, + V1MNZ_RRR_0_OPCODE_X0 = 103, + V1MNZ_RRR_0_OPCODE_X1 = 66, + V1MULTU_RRR_0_OPCODE_X0 = 104, + V1MULUS_RRR_0_OPCODE_X0 = 105, + V1MULU_RRR_0_OPCODE_X0 = 106, + V1MZ_RRR_0_OPCODE_X0 = 107, + V1MZ_RRR_0_OPCODE_X1 = 67, + V1SADAU_RRR_0_OPCODE_X0 = 108, + V1SADU_RRR_0_OPCODE_X0 = 109, + V1SHLI_SHIFT_OPCODE_X0 = 7, + V1SHLI_SHIFT_OPCODE_X1 = 7, + V1SHL_RRR_0_OPCODE_X0 = 110, + V1SHL_RRR_0_OPCODE_X1 = 68, + V1SHRSI_SHIFT_OPCODE_X0 = 8, + V1SHRSI_SHIFT_OPCODE_X1 = 8, + V1SHRS_RRR_0_OPCODE_X0 = 111, + V1SHRS_RRR_0_OPCODE_X1 = 69, + V1SHRUI_SHIFT_OPCODE_X0 = 9, + V1SHRUI_SHIFT_OPCODE_X1 = 9, + V1SHRU_RRR_0_OPCODE_X0 = 112, + V1SHRU_RRR_0_OPCODE_X1 = 70, + V1SUBUC_RRR_0_OPCODE_X0 = 113, + V1SUBUC_RRR_0_OPCODE_X1 = 71, + V1SUB_RRR_0_OPCODE_X0 = 114, + V1SUB_RRR_0_OPCODE_X1 = 72, + V2ADDI_IMM8_OPCODE_X0 = 14, + V2ADDI_IMM8_OPCODE_X1 = 39, + V2ADDSC_RRR_0_OPCODE_X0 = 115, + V2ADDSC_RRR_0_OPCODE_X1 = 73, + V2ADD_RRR_0_OPCODE_X0 = 116, + V2ADD_RRR_0_OPCODE_X1 = 74, + V2ADIFFS_RRR_0_OPCODE_X0 = 117, + V2AVGS_RRR_0_OPCODE_X0 = 118, + V2CMPEQI_IMM8_OPCODE_X0 = 15, + V2CMPEQI_IMM8_OPCODE_X1 = 40, + V2CMPEQ_RRR_0_OPCODE_X0 = 119, + V2CMPEQ_RRR_0_OPCODE_X1 = 75, + V2CMPLES_RRR_0_OPCODE_X0 = 120, + V2CMPLES_RRR_0_OPCODE_X1 = 76, + V2CMPLEU_RRR_0_OPCODE_X0 = 121, + V2CMPLEU_RRR_0_OPCODE_X1 = 77, + V2CMPLTSI_IMM8_OPCODE_X0 = 16, + V2CMPLTSI_IMM8_OPCODE_X1 = 41, + V2CMPLTS_RRR_0_OPCODE_X0 = 122, + V2CMPLTS_RRR_0_OPCODE_X1 = 78, + V2CMPLTUI_IMM8_OPCODE_X0 = 17, + V2CMPLTUI_IMM8_OPCODE_X1 = 42, + V2CMPLTU_RRR_0_OPCODE_X0 = 123, + V2CMPLTU_RRR_0_OPCODE_X1 = 79, + V2CMPNE_RRR_0_OPCODE_X0 = 124, + V2CMPNE_RRR_0_OPCODE_X1 = 80, + V2DOTPA_RRR_0_OPCODE_X0 = 125, + V2DOTP_RRR_0_OPCODE_X0 = 126, + V2INT_H_RRR_0_OPCODE_X0 = 127, + V2INT_H_RRR_0_OPCODE_X1 = 81, + V2INT_L_RRR_0_OPCODE_X0 = 128, + V2INT_L_RRR_0_OPCODE_X1 = 82, + V2MAXSI_IMM8_OPCODE_X0 = 18, + V2MAXSI_IMM8_OPCODE_X1 = 43, + V2MAXS_RRR_0_OPCODE_X0 = 129, + V2MAXS_RRR_0_OPCODE_X1 = 83, + V2MINSI_IMM8_OPCODE_X0 = 19, + V2MINSI_IMM8_OPCODE_X1 = 44, + V2MINS_RRR_0_OPCODE_X0 = 130, + V2MINS_RRR_0_OPCODE_X1 = 84, + V2MNZ_RRR_0_OPCODE_X0 = 131, + V2MNZ_RRR_0_OPCODE_X1 = 85, + V2MULFSC_RRR_0_OPCODE_X0 = 132, + V2MULS_RRR_0_OPCODE_X0 = 133, + V2MULTS_RRR_0_OPCODE_X0 = 134, + V2MZ_RRR_0_OPCODE_X0 = 135, + V2MZ_RRR_0_OPCODE_X1 = 86, + V2PACKH_RRR_0_OPCODE_X0 = 136, + V2PACKH_RRR_0_OPCODE_X1 = 87, + V2PACKL_RRR_0_OPCODE_X0 = 137, + V2PACKL_RRR_0_OPCODE_X1 = 88, + V2PACKUC_RRR_0_OPCODE_X0 = 138, + V2PACKUC_RRR_0_OPCODE_X1 = 89, + V2SADAS_RRR_0_OPCODE_X0 = 139, + V2SADAU_RRR_0_OPCODE_X0 = 140, + V2SADS_RRR_0_OPCODE_X0 = 141, + V2SADU_RRR_0_OPCODE_X0 = 142, + V2SHLI_SHIFT_OPCODE_X0 = 10, + V2SHLI_SHIFT_OPCODE_X1 = 10, + V2SHLSC_RRR_0_OPCODE_X0 = 143, + V2SHLSC_RRR_0_OPCODE_X1 = 90, + V2SHL_RRR_0_OPCODE_X0 = 144, + V2SHL_RRR_0_OPCODE_X1 = 91, + V2SHRSI_SHIFT_OPCODE_X0 = 11, + V2SHRSI_SHIFT_OPCODE_X1 = 11, + V2SHRS_RRR_0_OPCODE_X0 = 145, + V2SHRS_RRR_0_OPCODE_X1 = 92, + V2SHRUI_SHIFT_OPCODE_X0 = 12, + V2SHRUI_SHIFT_OPCODE_X1 = 12, + V2SHRU_RRR_0_OPCODE_X0 = 146, + V2SHRU_RRR_0_OPCODE_X1 = 93, + V2SUBSC_RRR_0_OPCODE_X0 = 147, + V2SUBSC_RRR_0_OPCODE_X1 = 94, + V2SUB_RRR_0_OPCODE_X0 = 148, + V2SUB_RRR_0_OPCODE_X1 = 95, + V4ADDSC_RRR_0_OPCODE_X0 = 149, + V4ADDSC_RRR_0_OPCODE_X1 = 96, + V4ADD_RRR_0_OPCODE_X0 = 150, + V4ADD_RRR_0_OPCODE_X1 = 97, + V4INT_H_RRR_0_OPCODE_X0 = 151, + V4INT_H_RRR_0_OPCODE_X1 = 98, + V4INT_L_RRR_0_OPCODE_X0 = 152, + V4INT_L_RRR_0_OPCODE_X1 = 99, + V4PACKSC_RRR_0_OPCODE_X0 = 153, + V4PACKSC_RRR_0_OPCODE_X1 = 100, + V4SHLSC_RRR_0_OPCODE_X0 = 154, + V4SHLSC_RRR_0_OPCODE_X1 = 101, + V4SHL_RRR_0_OPCODE_X0 = 155, + V4SHL_RRR_0_OPCODE_X1 = 102, + V4SHRS_RRR_0_OPCODE_X0 = 156, + V4SHRS_RRR_0_OPCODE_X1 = 103, + V4SHRU_RRR_0_OPCODE_X0 = 157, + V4SHRU_RRR_0_OPCODE_X1 = 104, + V4SUBSC_RRR_0_OPCODE_X0 = 158, + V4SUBSC_RRR_0_OPCODE_X1 = 105, + V4SUB_RRR_0_OPCODE_X0 = 159, + V4SUB_RRR_0_OPCODE_X1 = 106, + WH64_UNARY_OPCODE_X1 = 38, + XORI_IMM8_OPCODE_X0 = 20, + XORI_IMM8_OPCODE_X1 = 45, + XOR_RRR_0_OPCODE_X0 = 160, + XOR_RRR_0_OPCODE_X1 = 107, + XOR_RRR_5_OPCODE_Y0 = 3, + XOR_RRR_5_OPCODE_Y1 = 3 +}; + +static __inline unsigned int +get_BFEnd_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_BFOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 24)) & 0xf); +} + +static __inline unsigned int +get_BFStart_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3f); +} + +static __inline unsigned int +get_BrOff_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 37)) & 0x0001ffc0); +} + +static __inline unsigned int +get_BrType_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 54)) & 0x1f); +} + +static __inline unsigned int +get_Dest_Imm8_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 43)) & 0x000000c0); +} + +static __inline unsigned int +get_Dest_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 0)) & 0x3f); +} + +static __inline unsigned int +get_Dest_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x3f); +} + +static __inline unsigned int +get_Imm16_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xffff); +} + +static __inline unsigned int +get_Imm16_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xffff); +} + +static __inline unsigned int +get_Imm8OpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0xff); +} + +static __inline unsigned int +get_Imm8OpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 51)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0xff); +} + +static __inline unsigned int +get_Imm8_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0xff); +} + +static __inline unsigned int +get_JumpOff_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x7ffffff); +} + +static __inline unsigned int +get_JumpOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 58)) & 0x1); +} + +static __inline unsigned int +get_MF_Imm14_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3fff); +} + +static __inline unsigned int +get_MT_Imm14_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 31)) & 0x0000003f) | + (((unsigned int)(n >> 37)) & 0x00003fc0); +} + +static __inline unsigned int +get_Mode(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 62)) & 0x3); +} + +static __inline unsigned int +get_Opcode_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 28)) & 0x7); +} + +static __inline unsigned int +get_Opcode_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 59)) & 0x7); +} + +static __inline unsigned int +get_Opcode_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 27)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 58)) & 0xf); +} + +static __inline unsigned int +get_Opcode_Y2(tilegx_bundle_bits n) +{ + return (((n >> 26)) & 0x00000001) | + (((unsigned int)(n >> 56)) & 0x00000002); +} + +static __inline unsigned int +get_RRROpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3ff); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3); +} + +static __inline unsigned int +get_RRROpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3); +} + +static __inline unsigned int +get_ShAmt_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_ShAmt_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3ff); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3ff); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 18)) & 0x3); +} + +static __inline unsigned int +get_ShiftOpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 49)) & 0x3); +} + +static __inline unsigned int +get_SrcA_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 6)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 37)) & 0x3f); +} + +static __inline unsigned int +get_SrcA_Y2(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 20)) & 0x3f); +} + +static __inline unsigned int +get_SrcBDest_Y2(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 51)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_SrcB_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_X0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_X1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_Y0(tilegx_bundle_bits num) +{ + const unsigned int n = (unsigned int)num; + return (((n >> 12)) & 0x3f); +} + +static __inline unsigned int +get_UnaryOpcodeExtension_Y1(tilegx_bundle_bits n) +{ + return (((unsigned int)(n >> 43)) & 0x3f); +} + +static __inline int +sign_extend(int n, int num_bits) +{ + int shift = (int)(sizeof(int) * 8 - num_bits); + return (n << shift) >> shift; +} + +static __inline tilegx_bundle_bits +create_BFEnd_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_BFOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 24); +} + +static __inline tilegx_bundle_bits +create_BFStart_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 18); +} + +static __inline tilegx_bundle_bits +create_BrOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x0001ffc0)) << 37); +} + +static __inline tilegx_bundle_bits +create_BrType_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x1f)) << 54); +} + +static __inline tilegx_bundle_bits +create_Dest_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x000000c0)) << 43); +} + +static __inline tilegx_bundle_bits +create_Dest_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilegx_bundle_bits +create_Dest_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilegx_bundle_bits +create_Dest_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 0); +} + +static __inline tilegx_bundle_bits +create_Dest_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 31); +} + +static __inline tilegx_bundle_bits +create_Imm16_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xffff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm16_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xffff)) << 43); +} + +static __inline tilegx_bundle_bits +create_Imm8OpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 20); +} + +static __inline tilegx_bundle_bits +create_Imm8OpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 51); +} + +static __inline tilegx_bundle_bits +create_Imm8_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm8_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilegx_bundle_bits +create_Imm8_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xff) << 12); +} + +static __inline tilegx_bundle_bits +create_Imm8_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xff)) << 43); +} + +static __inline tilegx_bundle_bits +create_JumpOff_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x7ffffff)) << 31); +} + +static __inline tilegx_bundle_bits +create_JumpOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x1)) << 58); +} + +static __inline tilegx_bundle_bits +create_MF_Imm14_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3fff)) << 37); +} + +static __inline tilegx_bundle_bits +create_MT_Imm14_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x0000003f)) << 31) | + (((tilegx_bundle_bits)(n & 0x00003fc0)) << 37); +} + +static __inline tilegx_bundle_bits +create_Mode(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 62); +} + +static __inline tilegx_bundle_bits +create_Opcode_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x7) << 28); +} + +static __inline tilegx_bundle_bits +create_Opcode_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x7)) << 59); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0xf) << 27); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0xf)) << 58); +} + +static __inline tilegx_bundle_bits +create_Opcode_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x00000001) << 26) | + (((tilegx_bundle_bits)(n & 0x00000002)) << 56); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 18); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3ff)) << 49); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 18); +} + +static __inline tilegx_bundle_bits +create_RRROpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 49); +} + +static __inline tilegx_bundle_bits +create_ShAmt_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_ShAmt_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_ShAmt_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_ShAmt_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3ff) << 18); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3ff)) << 49); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3) << 18); +} + +static __inline tilegx_bundle_bits +create_ShiftOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3)) << 49); +} + +static __inline tilegx_bundle_bits +create_SrcA_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilegx_bundle_bits +create_SrcA_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 6); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 37); +} + +static __inline tilegx_bundle_bits +create_SrcA_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 20); +} + +static __inline tilegx_bundle_bits +create_SrcBDest_Y2(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 51); +} + +static __inline tilegx_bundle_bits +create_SrcB_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_SrcB_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_SrcB_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_SrcB_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_X0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_X1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_Y0(int num) +{ + const unsigned int n = (unsigned int)num; + return ((n & 0x3f) << 12); +} + +static __inline tilegx_bundle_bits +create_UnaryOpcodeExtension_Y1(int num) +{ + const unsigned int n = (unsigned int)num; + return (((tilegx_bundle_bits)(n & 0x3f)) << 43); +} + +const struct tilegx_opcode tilegx_opcodes[336] = +{ + { "bpt", TILEGX_OPC_BPT, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffffffff80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a44ae00000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "info", TILEGX_OPC_INFO, 0xf, 1, TREG_ZERO, 1, + { { 0 }, { 1 }, { 2 }, { 3 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00fffULL, + 0xfff807ff80000000ULL, + 0x0000000078000fffULL, + 0x3c0007ff80000000ULL, + 0ULL + }, + { + 0x0000000040300fffULL, + 0x181807ff80000000ULL, + 0x0000000010000fffULL, + 0x0c0007ff80000000ULL, + -1ULL + } +#endif + }, + { "infol", TILEGX_OPC_INFOL, 0x3, 1, TREG_ZERO, 1, + { { 4 }, { 5 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc000000070000fffULL, + 0xf80007ff80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000070000fffULL, + 0x380007ff80000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ld4s_tls", TILEGX_OPC_LD4S_TLS, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1858000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ld_tls", TILEGX_OPC_LD_TLS, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18a0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "move", TILEGX_OPC_MOVE, 0xf, 2, TREG_ZERO, 1, + { { 8, 9 }, { 6, 7 }, { 10, 11 }, { 12, 13 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0xfffff80000000000ULL, + 0x00000000780ff000ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + 0x000000005107f000ULL, + 0x283bf80000000000ULL, + 0x00000000500bf000ULL, + 0x2c05f80000000000ULL, + -1ULL + } +#endif + }, + { "movei", TILEGX_OPC_MOVEI, 0xf, 2, TREG_ZERO, 1, + { { 8, 0 }, { 6, 1 }, { 10, 2 }, { 12, 3 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00fc0ULL, + 0xfff807e000000000ULL, + 0x0000000078000fc0ULL, + 0x3c0007e000000000ULL, + 0ULL + }, + { + 0x0000000040100fc0ULL, + 0x180807e000000000ULL, + 0x0000000000000fc0ULL, + 0x040007e000000000ULL, + -1ULL + } +#endif + }, + { "moveli", TILEGX_OPC_MOVELI, 0x3, 2, TREG_ZERO, 1, + { { 8, 4 }, { 6, 5 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc000000070000fc0ULL, + 0xf80007e000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000010000fc0ULL, + 0x000007e000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "prefetch", TILEGX_OPC_PREFETCH, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff81f80000000ULL, + 0ULL, + 0ULL, + 0xc3f8000004000000ULL + }, + { + -1ULL, + 0x286a801f80000000ULL, + -1ULL, + -1ULL, + 0x41f8000004000000ULL + } +#endif + }, + { "prefetch_add_l1", TILEGX_OPC_PREFETCH_ADD_L1, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8001f80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1840001f80000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "prefetch_add_l1_fault", TILEGX_OPC_PREFETCH_ADD_L1_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8001f80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1838001f80000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "prefetch_add_l2", TILEGX_OPC_PREFETCH_ADD_L2, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8001f80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1850001f80000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "prefetch_add_l2_fault", TILEGX_OPC_PREFETCH_ADD_L2_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8001f80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1848001f80000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "prefetch_add_l3", TILEGX_OPC_PREFETCH_ADD_L3, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8001f80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1860001f80000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "prefetch_add_l3_fault", TILEGX_OPC_PREFETCH_ADD_L3_FAULT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8001f80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1858001f80000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "prefetch_l1", TILEGX_OPC_PREFETCH_L1, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff81f80000000ULL, + 0ULL, + 0ULL, + 0xc3f8000004000000ULL + }, + { + -1ULL, + 0x286a801f80000000ULL, + -1ULL, + -1ULL, + 0x41f8000004000000ULL + } +#endif + }, + { "prefetch_l1_fault", TILEGX_OPC_PREFETCH_L1_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff81f80000000ULL, + 0ULL, + 0ULL, + 0xc3f8000004000000ULL + }, + { + -1ULL, + 0x286a781f80000000ULL, + -1ULL, + -1ULL, + 0x41f8000000000000ULL + } +#endif + }, + { "prefetch_l2", TILEGX_OPC_PREFETCH_L2, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff81f80000000ULL, + 0ULL, + 0ULL, + 0xc3f8000004000000ULL + }, + { + -1ULL, + 0x286a901f80000000ULL, + -1ULL, + -1ULL, + 0x43f8000004000000ULL + } +#endif + }, + { "prefetch_l2_fault", TILEGX_OPC_PREFETCH_L2_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff81f80000000ULL, + 0ULL, + 0ULL, + 0xc3f8000004000000ULL + }, + { + -1ULL, + 0x286a881f80000000ULL, + -1ULL, + -1ULL, + 0x43f8000000000000ULL + } +#endif + }, + { "prefetch_l3", TILEGX_OPC_PREFETCH_L3, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff81f80000000ULL, + 0ULL, + 0ULL, + 0xc3f8000004000000ULL + }, + { + -1ULL, + 0x286aa01f80000000ULL, + -1ULL, + -1ULL, + 0x83f8000000000000ULL + } +#endif + }, + { "prefetch_l3_fault", TILEGX_OPC_PREFETCH_L3_FAULT, 0x12, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff81f80000000ULL, + 0ULL, + 0ULL, + 0xc3f8000004000000ULL + }, + { + -1ULL, + 0x286a981f80000000ULL, + -1ULL, + -1ULL, + 0x81f8000004000000ULL + } +#endif + }, + { "raise", TILEGX_OPC_RAISE, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffffffff80000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a44ae80000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "add", TILEGX_OPC_ADD, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x00000000500c0000ULL, + 0x2806000000000000ULL, + 0x0000000028040000ULL, + 0x1802000000000000ULL, + -1ULL + } +#endif + }, + { "addi", TILEGX_OPC_ADDI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0x0000000078000000ULL, + 0x3c00000000000000ULL, + 0ULL + }, + { + 0x0000000040100000ULL, + 0x1808000000000000ULL, + 0ULL, + 0x0400000000000000ULL, + -1ULL + } +#endif + }, + { "addli", TILEGX_OPC_ADDLI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc000000070000000ULL, + 0xf800000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000010000000ULL, + 0ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "addx", TILEGX_OPC_ADDX, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000050080000ULL, + 0x2804000000000000ULL, + 0x0000000028000000ULL, + 0x1800000000000000ULL, + -1ULL + } +#endif + }, + { "addxi", TILEGX_OPC_ADDXI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0x0000000078000000ULL, + 0x3c00000000000000ULL, + 0ULL + }, + { + 0x0000000040200000ULL, + 0x1810000000000000ULL, + 0x0000000008000000ULL, + 0x0800000000000000ULL, + -1ULL + } +#endif + }, + { "addxli", TILEGX_OPC_ADDXLI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc000000070000000ULL, + 0xf800000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000020000000ULL, + 0x0800000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "addxsc", TILEGX_OPC_ADDXSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050040000ULL, + 0x2802000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "and", TILEGX_OPC_AND, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000050100000ULL, + 0x2808000000000000ULL, + 0x0000000050000000ULL, + 0x2c00000000000000ULL, + -1ULL + } +#endif + }, + { "andi", TILEGX_OPC_ANDI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0x0000000078000000ULL, + 0x3c00000000000000ULL, + 0ULL + }, + { + 0x0000000040300000ULL, + 0x1818000000000000ULL, + 0x0000000010000000ULL, + 0x0c00000000000000ULL, + -1ULL + } +#endif + }, + { "beqz", TILEGX_OPC_BEQZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1440000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "beqzt", TILEGX_OPC_BEQZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1400000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bfexts", TILEGX_OPC_BFEXTS, 0x1, 4, TREG_ZERO, 1, + { { 8, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007f000000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000034000000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bfextu", TILEGX_OPC_BFEXTU, 0x1, 4, TREG_ZERO, 1, + { { 8, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007f000000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000035000000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bfins", TILEGX_OPC_BFINS, 0x1, 4, TREG_ZERO, 1, + { { 23, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007f000000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000036000000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bgez", TILEGX_OPC_BGEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x14c0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bgezt", TILEGX_OPC_BGEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1480000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bgtz", TILEGX_OPC_BGTZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1540000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bgtzt", TILEGX_OPC_BGTZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1500000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "blbc", TILEGX_OPC_BLBC, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x15c0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "blbct", TILEGX_OPC_BLBCT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1580000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "blbs", TILEGX_OPC_BLBS, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1640000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "blbst", TILEGX_OPC_BLBST, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1600000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "blez", TILEGX_OPC_BLEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x16c0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "blezt", TILEGX_OPC_BLEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1680000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bltz", TILEGX_OPC_BLTZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1740000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bltzt", TILEGX_OPC_BLTZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1700000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bnez", TILEGX_OPC_BNEZ, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x17c0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "bnezt", TILEGX_OPC_BNEZT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 20 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xffc0000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1780000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "clz", TILEGX_OPC_CLZ, 0x5, 2, TREG_ZERO, 1, + { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051481000ULL, + -1ULL, + 0x00000000300c1000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmoveqz", TILEGX_OPC_CMOVEQZ, 0x5, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050140000ULL, + -1ULL, + 0x0000000048000000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmovnez", TILEGX_OPC_CMOVNEZ, 0x5, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050180000ULL, + -1ULL, + 0x0000000048040000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmpeq", TILEGX_OPC_CMPEQ, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x00000000501c0000ULL, + 0x280a000000000000ULL, + 0x0000000040000000ULL, + 0x2404000000000000ULL, + -1ULL + } +#endif + }, + { "cmpeqi", TILEGX_OPC_CMPEQI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0x0000000078000000ULL, + 0x3c00000000000000ULL, + 0ULL + }, + { + 0x0000000040400000ULL, + 0x1820000000000000ULL, + 0x0000000018000000ULL, + 0x1000000000000000ULL, + -1ULL + } +#endif + }, + { "cmpexch", TILEGX_OPC_CMPEXCH, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x280e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmpexch4", TILEGX_OPC_CMPEXCH4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x280c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmples", TILEGX_OPC_CMPLES, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000050200000ULL, + 0x2810000000000000ULL, + 0x0000000038000000ULL, + 0x2000000000000000ULL, + -1ULL + } +#endif + }, + { "cmpleu", TILEGX_OPC_CMPLEU, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000050240000ULL, + 0x2812000000000000ULL, + 0x0000000038040000ULL, + 0x2002000000000000ULL, + -1ULL + } +#endif + }, + { "cmplts", TILEGX_OPC_CMPLTS, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000050280000ULL, + 0x2814000000000000ULL, + 0x0000000038080000ULL, + 0x2004000000000000ULL, + -1ULL + } +#endif + }, + { "cmpltsi", TILEGX_OPC_CMPLTSI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 10, 11, 2 }, { 12, 13, 3 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0x0000000078000000ULL, + 0x3c00000000000000ULL, + 0ULL + }, + { + 0x0000000040500000ULL, + 0x1828000000000000ULL, + 0x0000000020000000ULL, + 0x1400000000000000ULL, + -1ULL + } +#endif + }, + { "cmpltu", TILEGX_OPC_CMPLTU, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x00000000502c0000ULL, + 0x2816000000000000ULL, + 0x00000000380c0000ULL, + 0x2006000000000000ULL, + -1ULL + } +#endif + }, + { "cmpltui", TILEGX_OPC_CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040600000ULL, + 0x1830000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmpne", TILEGX_OPC_CMPNE, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000050300000ULL, + 0x2818000000000000ULL, + 0x0000000040040000ULL, + 0x2406000000000000ULL, + -1ULL + } +#endif + }, + { "cmul", TILEGX_OPC_CMUL, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000504c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmula", TILEGX_OPC_CMULA, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050380000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmulaf", TILEGX_OPC_CMULAF, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050340000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmulf", TILEGX_OPC_CMULF, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050400000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmulfr", TILEGX_OPC_CMULFR, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000503c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmulh", TILEGX_OPC_CMULH, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050480000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "cmulhr", TILEGX_OPC_CMULHR, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050440000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "crc32_32", TILEGX_OPC_CRC32_32, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050500000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "crc32_8", TILEGX_OPC_CRC32_8, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050540000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ctz", TILEGX_OPC_CTZ, 0x5, 2, TREG_ZERO, 1, + { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051482000ULL, + -1ULL, + 0x00000000300c2000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "dblalign", TILEGX_OPC_DBLALIGN, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050640000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "dblalign2", TILEGX_OPC_DBLALIGN2, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050580000ULL, + 0x281a000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "dblalign4", TILEGX_OPC_DBLALIGN4, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000505c0000ULL, + 0x281c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "dblalign6", TILEGX_OPC_DBLALIGN6, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050600000ULL, + 0x281e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "drain", TILEGX_OPC_DRAIN, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a080000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "dtlbpr", TILEGX_OPC_DTLBPR, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a100000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "exch", TILEGX_OPC_EXCH, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2822000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "exch4", TILEGX_OPC_EXCH4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2820000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fdouble_add_flags", TILEGX_OPC_FDOUBLE_ADD_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000506c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fdouble_addsub", TILEGX_OPC_FDOUBLE_ADDSUB, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050680000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fdouble_mul_flags", TILEGX_OPC_FDOUBLE_MUL_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050700000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fdouble_pack1", TILEGX_OPC_FDOUBLE_PACK1, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050740000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fdouble_pack2", TILEGX_OPC_FDOUBLE_PACK2, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050780000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fdouble_sub_flags", TILEGX_OPC_FDOUBLE_SUB_FLAGS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000507c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fdouble_unpack_max", TILEGX_OPC_FDOUBLE_UNPACK_MAX, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050800000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fdouble_unpack_min", TILEGX_OPC_FDOUBLE_UNPACK_MIN, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050840000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fetchadd", TILEGX_OPC_FETCHADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x282a000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fetchadd4", TILEGX_OPC_FETCHADD4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2824000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fetchaddgez", TILEGX_OPC_FETCHADDGEZ, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2828000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fetchaddgez4", TILEGX_OPC_FETCHADDGEZ4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2826000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fetchand", TILEGX_OPC_FETCHAND, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x282e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fetchand4", TILEGX_OPC_FETCHAND4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x282c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fetchor", TILEGX_OPC_FETCHOR, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2832000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fetchor4", TILEGX_OPC_FETCHOR4, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2830000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "finv", TILEGX_OPC_FINV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a180000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "flush", TILEGX_OPC_FLUSH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a280000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "flushwb", TILEGX_OPC_FLUSHWB, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a200000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fnop", TILEGX_OPC_FNOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0xfffff80000000000ULL, + 0x00000000780ff000ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + 0x0000000051483000ULL, + 0x286a300000000000ULL, + 0x00000000300c3000ULL, + 0x1c06400000000000ULL, + -1ULL + } +#endif + }, + { "fsingle_add1", TILEGX_OPC_FSINGLE_ADD1, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050880000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fsingle_addsub2", TILEGX_OPC_FSINGLE_ADDSUB2, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000508c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fsingle_mul1", TILEGX_OPC_FSINGLE_MUL1, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050900000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fsingle_mul2", TILEGX_OPC_FSINGLE_MUL2, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050940000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fsingle_pack1", TILEGX_OPC_FSINGLE_PACK1, 0x5, 2, TREG_ZERO, 1, + { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051484000ULL, + -1ULL, + 0x00000000300c4000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fsingle_pack2", TILEGX_OPC_FSINGLE_PACK2, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050980000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "fsingle_sub1", TILEGX_OPC_FSINGLE_SUB1, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000509c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "icoh", TILEGX_OPC_ICOH, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a380000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ill", TILEGX_OPC_ILL, 0xa, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + -1ULL, + 0x286a400000000000ULL, + -1ULL, + 0x1c06480000000000ULL, + -1ULL + } +#endif + }, + { "inv", TILEGX_OPC_INV, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a480000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "iret", TILEGX_OPC_IRET, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286a500000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "j", TILEGX_OPC_J, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfc00000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2400000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "jal", TILEGX_OPC_JAL, 0x2, 1, TREG_LR, 1, + { { 0, }, { 25 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfc00000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2000000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "jalr", TILEGX_OPC_JALR, 0xa, 1, TREG_LR, 1, + { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + -1ULL, + 0x286a600000000000ULL, + -1ULL, + 0x1c06580000000000ULL, + -1ULL + } +#endif + }, + { "jalrp", TILEGX_OPC_JALRP, 0xa, 1, TREG_LR, 1, + { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + -1ULL, + 0x286a580000000000ULL, + -1ULL, + 0x1c06500000000000ULL, + -1ULL + } +#endif + }, + { "jr", TILEGX_OPC_JR, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + -1ULL, + 0x286a700000000000ULL, + -1ULL, + 0x1c06680000000000ULL, + -1ULL + } +#endif + }, + { "jrp", TILEGX_OPC_JRP, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 13 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + -1ULL, + 0x286a680000000000ULL, + -1ULL, + 0x1c06600000000000ULL, + -1ULL + } +#endif + }, + { "ld", TILEGX_OPC_LD, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x286ae80000000000ULL, + -1ULL, + -1ULL, + 0x8200000004000000ULL + } +#endif + }, + { "ld1s", TILEGX_OPC_LD1S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x286a780000000000ULL, + -1ULL, + -1ULL, + 0x4000000000000000ULL + } +#endif + }, + { "ld1s_add", TILEGX_OPC_LD1S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1838000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ld1u", TILEGX_OPC_LD1U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x286a800000000000ULL, + -1ULL, + -1ULL, + 0x4000000004000000ULL + } +#endif + }, + { "ld1u_add", TILEGX_OPC_LD1U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1840000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ld2s", TILEGX_OPC_LD2S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x286a880000000000ULL, + -1ULL, + -1ULL, + 0x4200000000000000ULL + } +#endif + }, + { "ld2s_add", TILEGX_OPC_LD2S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1848000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ld2u", TILEGX_OPC_LD2U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x286a900000000000ULL, + -1ULL, + -1ULL, + 0x4200000004000000ULL + } +#endif + }, + { "ld2u_add", TILEGX_OPC_LD2U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1850000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ld4s", TILEGX_OPC_LD4S, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x286a980000000000ULL, + -1ULL, + -1ULL, + 0x8000000004000000ULL + } +#endif + }, + { "ld4s_add", TILEGX_OPC_LD4S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1858000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ld4u", TILEGX_OPC_LD4U, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 26, 14 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x286aa00000000000ULL, + -1ULL, + -1ULL, + 0x8200000000000000ULL + } +#endif + }, + { "ld4u_add", TILEGX_OPC_LD4U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1860000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ld_add", TILEGX_OPC_LD_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18a0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldna", TILEGX_OPC_LDNA, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286aa80000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldna_add", TILEGX_OPC_LDNA_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18a8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt", TILEGX_OPC_LDNT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286ae00000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt1s", TILEGX_OPC_LDNT1S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286ab00000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt1s_add", TILEGX_OPC_LDNT1S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1868000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt1u", TILEGX_OPC_LDNT1U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286ab80000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt1u_add", TILEGX_OPC_LDNT1U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1870000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt2s", TILEGX_OPC_LDNT2S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286ac00000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt2s_add", TILEGX_OPC_LDNT2S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1878000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt2u", TILEGX_OPC_LDNT2U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286ac80000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt2u_add", TILEGX_OPC_LDNT2U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1880000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt4s", TILEGX_OPC_LDNT4S, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286ad00000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt4s_add", TILEGX_OPC_LDNT4S_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1888000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt4u", TILEGX_OPC_LDNT4U, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286ad80000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt4u_add", TILEGX_OPC_LDNT4U_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1890000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "ldnt_add", TILEGX_OPC_LDNT_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 6, 15, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1898000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "lnk", TILEGX_OPC_LNK, 0xa, 1, TREG_ZERO, 1, + { { 0, }, { 6 }, { 0, }, { 12 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + -1ULL, + 0x286af00000000000ULL, + -1ULL, + 0x1c06700000000000ULL, + -1ULL + } +#endif + }, + { "mf", TILEGX_OPC_MF, 0x2, 0, TREG_ZERO, 1, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286af80000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mfspr", TILEGX_OPC_MFSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 6, 27 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18b0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mm", TILEGX_OPC_MM, 0x1, 4, TREG_ZERO, 1, + { { 23, 9, 21, 22 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007f000000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000037000000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mnz", TILEGX_OPC_MNZ, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000050a00000ULL, + 0x2834000000000000ULL, + 0x0000000048080000ULL, + 0x2804000000000000ULL, + -1ULL + } +#endif + }, + { "mtspr", TILEGX_OPC_MTSPR, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 28, 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18b8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_hs_hs", TILEGX_OPC_MUL_HS_HS, 0x5, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050d40000ULL, + -1ULL, + 0x0000000068000000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_hs_hu", TILEGX_OPC_MUL_HS_HU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050d80000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_hs_ls", TILEGX_OPC_MUL_HS_LS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050dc0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_hs_lu", TILEGX_OPC_MUL_HS_LU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050e00000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_hu_hu", TILEGX_OPC_MUL_HU_HU, 0x5, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050e40000ULL, + -1ULL, + 0x0000000068040000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_hu_ls", TILEGX_OPC_MUL_HU_LS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050e80000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_hu_lu", TILEGX_OPC_MUL_HU_LU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050ec0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_ls_ls", TILEGX_OPC_MUL_LS_LS, 0x5, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050f00000ULL, + -1ULL, + 0x0000000068080000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_ls_lu", TILEGX_OPC_MUL_LS_LU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050f40000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mul_lu_lu", TILEGX_OPC_MUL_LU_LU, 0x5, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050f80000ULL, + -1ULL, + 0x00000000680c0000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_hs_hs", TILEGX_OPC_MULA_HS_HS, 0x5, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050a80000ULL, + -1ULL, + 0x0000000070000000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_hs_hu", TILEGX_OPC_MULA_HS_HU, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050ac0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_hs_ls", TILEGX_OPC_MULA_HS_LS, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050b00000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_hs_lu", TILEGX_OPC_MULA_HS_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050b40000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_hu_hu", TILEGX_OPC_MULA_HU_HU, 0x5, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050b80000ULL, + -1ULL, + 0x0000000070040000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_hu_ls", TILEGX_OPC_MULA_HU_LS, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050bc0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_hu_lu", TILEGX_OPC_MULA_HU_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050c00000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_ls_ls", TILEGX_OPC_MULA_LS_LS, 0x5, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050c40000ULL, + -1ULL, + 0x0000000070080000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_ls_lu", TILEGX_OPC_MULA_LS_LU, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050c80000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mula_lu_lu", TILEGX_OPC_MULA_LU_LU, 0x5, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050cc0000ULL, + -1ULL, + 0x00000000700c0000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mulax", TILEGX_OPC_MULAX, 0x5, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 24, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050a40000ULL, + -1ULL, + 0x0000000040080000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mulx", TILEGX_OPC_MULX, 0x5, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 10, 11, 18 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0x00000000780c0000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000050d00000ULL, + -1ULL, + 0x00000000400c0000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "mz", TILEGX_OPC_MZ, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000050fc0000ULL, + 0x2836000000000000ULL, + 0x00000000480c0000ULL, + 0x2806000000000000ULL, + -1ULL + } +#endif + }, + { "nap", TILEGX_OPC_NAP, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286b000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "nop", TILEGX_OPC_NOP, 0xf, 0, TREG_ZERO, 1, + { { }, { }, { }, { }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0xfffff80000000000ULL, + 0x00000000780ff000ULL, + 0x3c07f80000000000ULL, + 0ULL + }, + { + 0x0000000051485000ULL, + 0x286b080000000000ULL, + 0x00000000300c5000ULL, + 0x1c06780000000000ULL, + -1ULL + } +#endif + }, + { "nor", TILEGX_OPC_NOR, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051000000ULL, + 0x2838000000000000ULL, + 0x0000000050040000ULL, + 0x2c02000000000000ULL, + -1ULL + } +#endif + }, + { "or", TILEGX_OPC_OR, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051040000ULL, + 0x283a000000000000ULL, + 0x0000000050080000ULL, + 0x2c04000000000000ULL, + -1ULL + } +#endif + }, + { "ori", TILEGX_OPC_ORI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040700000ULL, + 0x18c0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "pcnt", TILEGX_OPC_PCNT, 0x5, 2, TREG_ZERO, 1, + { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051486000ULL, + -1ULL, + 0x00000000300c6000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "revbits", TILEGX_OPC_REVBITS, 0x5, 2, TREG_ZERO, 1, + { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051487000ULL, + -1ULL, + 0x00000000300c7000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "revbytes", TILEGX_OPC_REVBYTES, 0x5, 2, TREG_ZERO, 1, + { { 8, 9 }, { 0, }, { 10, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051488000ULL, + -1ULL, + 0x00000000300c8000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "rotl", TILEGX_OPC_ROTL, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051080000ULL, + 0x283c000000000000ULL, + 0x0000000058000000ULL, + 0x3000000000000000ULL, + -1ULL + } +#endif + }, + { "rotli", TILEGX_OPC_ROTLI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000060040000ULL, + 0x3002000000000000ULL, + 0x0000000078000000ULL, + 0x3800000000000000ULL, + -1ULL + } +#endif + }, + { "shl", TILEGX_OPC_SHL, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051280000ULL, + 0x284c000000000000ULL, + 0x0000000058040000ULL, + 0x3002000000000000ULL, + -1ULL + } +#endif + }, + { "shl16insli", TILEGX_OPC_SHL16INSLI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 4 }, { 6, 7, 5 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc000000070000000ULL, + 0xf800000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000070000000ULL, + 0x3800000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "shl1add", TILEGX_OPC_SHL1ADD, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051100000ULL, + 0x2840000000000000ULL, + 0x0000000030000000ULL, + 0x1c00000000000000ULL, + -1ULL + } +#endif + }, + { "shl1addx", TILEGX_OPC_SHL1ADDX, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x00000000510c0000ULL, + 0x283e000000000000ULL, + 0x0000000060040000ULL, + 0x3402000000000000ULL, + -1ULL + } +#endif + }, + { "shl2add", TILEGX_OPC_SHL2ADD, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051180000ULL, + 0x2844000000000000ULL, + 0x0000000030040000ULL, + 0x1c02000000000000ULL, + -1ULL + } +#endif + }, + { "shl2addx", TILEGX_OPC_SHL2ADDX, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051140000ULL, + 0x2842000000000000ULL, + 0x0000000060080000ULL, + 0x3404000000000000ULL, + -1ULL + } +#endif + }, + { "shl3add", TILEGX_OPC_SHL3ADD, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051200000ULL, + 0x2848000000000000ULL, + 0x0000000030080000ULL, + 0x1c04000000000000ULL, + -1ULL + } +#endif + }, + { "shl3addx", TILEGX_OPC_SHL3ADDX, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x00000000511c0000ULL, + 0x2846000000000000ULL, + 0x00000000600c0000ULL, + 0x3406000000000000ULL, + -1ULL + } +#endif + }, + { "shli", TILEGX_OPC_SHLI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000060080000ULL, + 0x3004000000000000ULL, + 0x0000000078040000ULL, + 0x3802000000000000ULL, + -1ULL + } +#endif + }, + { "shlx", TILEGX_OPC_SHLX, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051240000ULL, + 0x284a000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "shlxi", TILEGX_OPC_SHLXI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000600c0000ULL, + 0x3006000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "shrs", TILEGX_OPC_SHRS, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x00000000512c0000ULL, + 0x284e000000000000ULL, + 0x0000000058080000ULL, + 0x3004000000000000ULL, + -1ULL + } +#endif + }, + { "shrsi", TILEGX_OPC_SHRSI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000060100000ULL, + 0x3008000000000000ULL, + 0x0000000078080000ULL, + 0x3804000000000000ULL, + -1ULL + } +#endif + }, + { "shru", TILEGX_OPC_SHRU, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051340000ULL, + 0x2852000000000000ULL, + 0x00000000580c0000ULL, + 0x3006000000000000ULL, + -1ULL + } +#endif + }, + { "shrui", TILEGX_OPC_SHRUI, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 10, 11, 31 }, { 12, 13, 32 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000060140000ULL, + 0x300a000000000000ULL, + 0x00000000780c0000ULL, + 0x3806000000000000ULL, + -1ULL + } +#endif + }, + { "shrux", TILEGX_OPC_SHRUX, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051300000ULL, + 0x2850000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "shruxi", TILEGX_OPC_SHRUXI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000060180000ULL, + 0x300c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "shufflebytes", TILEGX_OPC_SHUFFLEBYTES, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051380000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "st", TILEGX_OPC_ST, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x2862000000000000ULL, + -1ULL, + -1ULL, + 0xc200000004000000ULL + } +#endif + }, + { "st1", TILEGX_OPC_ST1, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x2854000000000000ULL, + -1ULL, + -1ULL, + 0xc000000000000000ULL + } +#endif + }, + { "st1_add", TILEGX_OPC_ST1_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18c8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "st2", TILEGX_OPC_ST2, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x2856000000000000ULL, + -1ULL, + -1ULL, + 0xc000000004000000ULL + } +#endif + }, + { "st2_add", TILEGX_OPC_ST2_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18d0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "st4", TILEGX_OPC_ST4, 0x12, 2, TREG_ZERO, 1, + { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 14, 33 } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0xc200000004000000ULL + }, + { + -1ULL, + 0x2858000000000000ULL, + -1ULL, + -1ULL, + 0xc200000000000000ULL + } +#endif + }, + { "st4_add", TILEGX_OPC_ST4_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18d8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "st_add", TILEGX_OPC_ST_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x1900000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "stnt", TILEGX_OPC_STNT, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x2860000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "stnt1", TILEGX_OPC_STNT1, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x285a000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "stnt1_add", TILEGX_OPC_STNT1_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18e0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "stnt2", TILEGX_OPC_STNT2, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x285c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "stnt2_add", TILEGX_OPC_STNT2_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18e8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "stnt4", TILEGX_OPC_STNT4, 0x2, 2, TREG_ZERO, 1, + { { 0, }, { 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x285e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "stnt4_add", TILEGX_OPC_STNT4_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18f0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "stnt_add", TILEGX_OPC_STNT_ADD, 0x2, 3, TREG_ZERO, 1, + { { 0, }, { 15, 17, 34 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x18f8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "sub", TILEGX_OPC_SUB, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051440000ULL, + 0x2868000000000000ULL, + 0x00000000280c0000ULL, + 0x1806000000000000ULL, + -1ULL + } +#endif + }, + { "subx", TILEGX_OPC_SUBX, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000051400000ULL, + 0x2866000000000000ULL, + 0x0000000028080000ULL, + 0x1804000000000000ULL, + -1ULL + } +#endif + }, + { "subxsc", TILEGX_OPC_SUBXSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000513c0000ULL, + 0x2864000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "swint0", TILEGX_OPC_SWINT0, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286b100000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "swint1", TILEGX_OPC_SWINT1, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286b180000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "swint2", TILEGX_OPC_SWINT2, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286b200000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "swint3", TILEGX_OPC_SWINT3, 0x2, 0, TREG_ZERO, 0, + { { 0, }, { }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286b280000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "tblidxb0", TILEGX_OPC_TBLIDXB0, 0x5, 2, TREG_ZERO, 1, + { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051489000ULL, + -1ULL, + 0x00000000300c9000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "tblidxb1", TILEGX_OPC_TBLIDXB1, 0x5, 2, TREG_ZERO, 1, + { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x000000005148a000ULL, + -1ULL, + 0x00000000300ca000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "tblidxb2", TILEGX_OPC_TBLIDXB2, 0x5, 2, TREG_ZERO, 1, + { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x000000005148b000ULL, + -1ULL, + 0x00000000300cb000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "tblidxb3", TILEGX_OPC_TBLIDXB3, 0x5, 2, TREG_ZERO, 1, + { { 23, 9 }, { 0, }, { 24, 11 }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffff000ULL, + 0ULL, + 0x00000000780ff000ULL, + 0ULL, + 0ULL + }, + { + 0x000000005148c000ULL, + -1ULL, + 0x00000000300cc000ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1add", TILEGX_OPC_V1ADD, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051500000ULL, + 0x286e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1addi", TILEGX_OPC_V1ADDI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040800000ULL, + 0x1908000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1adduc", TILEGX_OPC_V1ADDUC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000514c0000ULL, + 0x286c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1adiffu", TILEGX_OPC_V1ADIFFU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051540000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1avgu", TILEGX_OPC_V1AVGU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051580000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmpeq", TILEGX_OPC_V1CMPEQ, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000515c0000ULL, + 0x2870000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmpeqi", TILEGX_OPC_V1CMPEQI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040900000ULL, + 0x1910000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmples", TILEGX_OPC_V1CMPLES, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051600000ULL, + 0x2872000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmpleu", TILEGX_OPC_V1CMPLEU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051640000ULL, + 0x2874000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmplts", TILEGX_OPC_V1CMPLTS, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051680000ULL, + 0x2876000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmpltsi", TILEGX_OPC_V1CMPLTSI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040a00000ULL, + 0x1918000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmpltu", TILEGX_OPC_V1CMPLTU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000516c0000ULL, + 0x2878000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmpltui", TILEGX_OPC_V1CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040b00000ULL, + 0x1920000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1cmpne", TILEGX_OPC_V1CMPNE, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051700000ULL, + 0x287a000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1ddotpu", TILEGX_OPC_V1DDOTPU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052880000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1ddotpua", TILEGX_OPC_V1DDOTPUA, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052840000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1ddotpus", TILEGX_OPC_V1DDOTPUS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051780000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1ddotpusa", TILEGX_OPC_V1DDOTPUSA, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051740000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1dotp", TILEGX_OPC_V1DOTP, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051880000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1dotpa", TILEGX_OPC_V1DOTPA, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000517c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1dotpu", TILEGX_OPC_V1DOTPU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052900000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1dotpua", TILEGX_OPC_V1DOTPUA, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000528c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1dotpus", TILEGX_OPC_V1DOTPUS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051840000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1dotpusa", TILEGX_OPC_V1DOTPUSA, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051800000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1int_h", TILEGX_OPC_V1INT_H, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000518c0000ULL, + 0x287c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1int_l", TILEGX_OPC_V1INT_L, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051900000ULL, + 0x287e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1maxu", TILEGX_OPC_V1MAXU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051940000ULL, + 0x2880000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1maxui", TILEGX_OPC_V1MAXUI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040c00000ULL, + 0x1928000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1minu", TILEGX_OPC_V1MINU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051980000ULL, + 0x2882000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1minui", TILEGX_OPC_V1MINUI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040d00000ULL, + 0x1930000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1mnz", TILEGX_OPC_V1MNZ, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000519c0000ULL, + 0x2884000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1multu", TILEGX_OPC_V1MULTU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051a00000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1mulu", TILEGX_OPC_V1MULU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051a80000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1mulus", TILEGX_OPC_V1MULUS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051a40000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1mz", TILEGX_OPC_V1MZ, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051ac0000ULL, + 0x2886000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1sadau", TILEGX_OPC_V1SADAU, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051b00000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1sadu", TILEGX_OPC_V1SADU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051b40000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1shl", TILEGX_OPC_V1SHL, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051b80000ULL, + 0x2888000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1shli", TILEGX_OPC_V1SHLI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000601c0000ULL, + 0x300e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1shrs", TILEGX_OPC_V1SHRS, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051bc0000ULL, + 0x288a000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1shrsi", TILEGX_OPC_V1SHRSI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000060200000ULL, + 0x3010000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1shru", TILEGX_OPC_V1SHRU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051c00000ULL, + 0x288c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1shrui", TILEGX_OPC_V1SHRUI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000060240000ULL, + 0x3012000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1sub", TILEGX_OPC_V1SUB, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051c80000ULL, + 0x2890000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v1subuc", TILEGX_OPC_V1SUBUC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051c40000ULL, + 0x288e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2add", TILEGX_OPC_V2ADD, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051d00000ULL, + 0x2894000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2addi", TILEGX_OPC_V2ADDI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040e00000ULL, + 0x1938000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2addsc", TILEGX_OPC_V2ADDSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051cc0000ULL, + 0x2892000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2adiffs", TILEGX_OPC_V2ADIFFS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051d40000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2avgs", TILEGX_OPC_V2AVGS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051d80000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmpeq", TILEGX_OPC_V2CMPEQ, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051dc0000ULL, + 0x2896000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmpeqi", TILEGX_OPC_V2CMPEQI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000040f00000ULL, + 0x1940000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmples", TILEGX_OPC_V2CMPLES, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051e00000ULL, + 0x2898000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmpleu", TILEGX_OPC_V2CMPLEU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051e40000ULL, + 0x289a000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmplts", TILEGX_OPC_V2CMPLTS, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051e80000ULL, + 0x289c000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmpltsi", TILEGX_OPC_V2CMPLTSI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000041000000ULL, + 0x1948000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmpltu", TILEGX_OPC_V2CMPLTU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051ec0000ULL, + 0x289e000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmpltui", TILEGX_OPC_V2CMPLTUI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000041100000ULL, + 0x1950000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2cmpne", TILEGX_OPC_V2CMPNE, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051f00000ULL, + 0x28a0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2dotp", TILEGX_OPC_V2DOTP, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051f80000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2dotpa", TILEGX_OPC_V2DOTPA, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051f40000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2int_h", TILEGX_OPC_V2INT_H, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000051fc0000ULL, + 0x28a2000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2int_l", TILEGX_OPC_V2INT_L, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052000000ULL, + 0x28a4000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2maxs", TILEGX_OPC_V2MAXS, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052040000ULL, + 0x28a6000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2maxsi", TILEGX_OPC_V2MAXSI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000041200000ULL, + 0x1958000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2mins", TILEGX_OPC_V2MINS, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052080000ULL, + 0x28a8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2minsi", TILEGX_OPC_V2MINSI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000041300000ULL, + 0x1960000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2mnz", TILEGX_OPC_V2MNZ, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000520c0000ULL, + 0x28aa000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2mulfsc", TILEGX_OPC_V2MULFSC, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052100000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2muls", TILEGX_OPC_V2MULS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052140000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2mults", TILEGX_OPC_V2MULTS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052180000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2mz", TILEGX_OPC_V2MZ, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000521c0000ULL, + 0x28ac000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2packh", TILEGX_OPC_V2PACKH, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052200000ULL, + 0x28ae000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2packl", TILEGX_OPC_V2PACKL, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052240000ULL, + 0x28b0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2packuc", TILEGX_OPC_V2PACKUC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052280000ULL, + 0x28b2000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2sadas", TILEGX_OPC_V2SADAS, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000522c0000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2sadau", TILEGX_OPC_V2SADAU, 0x1, 3, TREG_ZERO, 1, + { { 23, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052300000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2sads", TILEGX_OPC_V2SADS, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052340000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2sadu", TILEGX_OPC_V2SADU, 0x1, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 0, }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052380000ULL, + -1ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2shl", TILEGX_OPC_V2SHL, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052400000ULL, + 0x28b6000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2shli", TILEGX_OPC_V2SHLI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000060280000ULL, + 0x3014000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2shlsc", TILEGX_OPC_V2SHLSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000523c0000ULL, + 0x28b4000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2shrs", TILEGX_OPC_V2SHRS, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052440000ULL, + 0x28b8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2shrsi", TILEGX_OPC_V2SHRSI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000602c0000ULL, + 0x3016000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2shru", TILEGX_OPC_V2SHRU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052480000ULL, + 0x28ba000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2shrui", TILEGX_OPC_V2SHRUI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 29 }, { 6, 7, 30 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000060300000ULL, + 0x3018000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2sub", TILEGX_OPC_V2SUB, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052500000ULL, + 0x28be000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v2subsc", TILEGX_OPC_V2SUBSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000524c0000ULL, + 0x28bc000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4add", TILEGX_OPC_V4ADD, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052580000ULL, + 0x28c2000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4addsc", TILEGX_OPC_V4ADDSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052540000ULL, + 0x28c0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4int_h", TILEGX_OPC_V4INT_H, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000525c0000ULL, + 0x28c4000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4int_l", TILEGX_OPC_V4INT_L, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052600000ULL, + 0x28c6000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4packsc", TILEGX_OPC_V4PACKSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052640000ULL, + 0x28c8000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4shl", TILEGX_OPC_V4SHL, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000526c0000ULL, + 0x28cc000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4shlsc", TILEGX_OPC_V4SHLSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052680000ULL, + 0x28ca000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4shrs", TILEGX_OPC_V4SHRS, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052700000ULL, + 0x28ce000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4shru", TILEGX_OPC_V4SHRU, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052740000ULL, + 0x28d0000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4sub", TILEGX_OPC_V4SUB, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x00000000527c0000ULL, + 0x28d4000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "v4subsc", TILEGX_OPC_V4SUBSC, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000052780000ULL, + 0x28d2000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "wh64", TILEGX_OPC_WH64, 0x2, 1, TREG_ZERO, 1, + { { 0, }, { 7 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0ULL, + 0xfffff80000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + -1ULL, + 0x286b300000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { "xor", TILEGX_OPC_XOR, 0xf, 3, TREG_ZERO, 1, + { { 8, 9, 16 }, { 6, 7, 17 }, { 10, 11, 18 }, { 12, 13, 19 }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ffc0000ULL, + 0xfffe000000000000ULL, + 0x00000000780c0000ULL, + 0x3c06000000000000ULL, + 0ULL + }, + { + 0x0000000052800000ULL, + 0x28d6000000000000ULL, + 0x00000000500c0000ULL, + 0x2c06000000000000ULL, + -1ULL + } +#endif + }, + { "xori", TILEGX_OPC_XORI, 0x3, 3, TREG_ZERO, 1, + { { 8, 9, 0 }, { 6, 7, 1 }, { 0, }, { 0, }, { 0, } }, +#ifndef DISASM_ONLY + { + 0xc00000007ff00000ULL, + 0xfff8000000000000ULL, + 0ULL, + 0ULL, + 0ULL + }, + { + 0x0000000041400000ULL, + 0x1968000000000000ULL, + -1ULL, + -1ULL, + -1ULL + } +#endif + }, + { NULL, TILEGX_OPC_NONE, 0, 0, TREG_ZERO, 0, { { 0, } }, +#ifndef DISASM_ONLY + { 0, }, { 0, } +#endif + } +}; + +#define BITFIELD(start, size) ((start) | (((1 << (size)) - 1) << 6)) +#define CHILD(array_index) (TILEGX_OPC_NONE + (array_index)) + +static const unsigned short decode_X0_fsm[936] = +{ + BITFIELD(22, 9) /* index 0 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BFEXTS, + TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTS, TILEGX_OPC_BFEXTU, + TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFEXTU, TILEGX_OPC_BFINS, + TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_BFINS, TILEGX_OPC_MM, + TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_MM, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(528), CHILD(578), + CHILD(583), CHILD(588), CHILD(593), CHILD(598), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(603), CHILD(620), CHILD(637), CHILD(654), CHILD(671), + CHILD(703), CHILD(797), CHILD(814), CHILD(831), CHILD(848), CHILD(865), + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(889), TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + CHILD(906), CHILD(906), CHILD(906), CHILD(906), CHILD(906), + BITFIELD(6, 2) /* index 513 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518), + BITFIELD(8, 2) /* index 518 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523), + BITFIELD(10, 2) /* index 523 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI, + BITFIELD(20, 2) /* index 528 */, + TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548), + BITFIELD(6, 2) /* index 533 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538), + BITFIELD(8, 2) /* index 538 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543), + BITFIELD(10, 2) /* index 543 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(0, 2) /* index 548 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553), + BITFIELD(2, 2) /* index 553 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558), + BITFIELD(4, 2) /* index 558 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563), + BITFIELD(6, 2) /* index 563 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568), + BITFIELD(8, 2) /* index 568 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573), + BITFIELD(10, 2) /* index 573 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(20, 2) /* index 578 */, + TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, TILEGX_OPC_ORI, + BITFIELD(20, 2) /* index 583 */, + TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, TILEGX_OPC_V1CMPLTSI, + TILEGX_OPC_V1CMPLTUI, + BITFIELD(20, 2) /* index 588 */, + TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, TILEGX_OPC_V2ADDI, + TILEGX_OPC_V2CMPEQI, + BITFIELD(20, 2) /* index 593 */, + TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, TILEGX_OPC_V2MAXSI, + TILEGX_OPC_V2MINSI, + BITFIELD(20, 2) /* index 598 */, + TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 603 */, + TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD, + TILEGX_OPC_AND, TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_CMPEQ, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPNE, TILEGX_OPC_CMULAF, TILEGX_OPC_CMULA, TILEGX_OPC_CMULFR, + BITFIELD(18, 4) /* index 620 */, + TILEGX_OPC_CMULF, TILEGX_OPC_CMULHR, TILEGX_OPC_CMULH, TILEGX_OPC_CMUL, + TILEGX_OPC_CRC32_32, TILEGX_OPC_CRC32_8, TILEGX_OPC_DBLALIGN2, + TILEGX_OPC_DBLALIGN4, TILEGX_OPC_DBLALIGN6, TILEGX_OPC_DBLALIGN, + TILEGX_OPC_FDOUBLE_ADDSUB, TILEGX_OPC_FDOUBLE_ADD_FLAGS, + TILEGX_OPC_FDOUBLE_MUL_FLAGS, TILEGX_OPC_FDOUBLE_PACK1, + TILEGX_OPC_FDOUBLE_PACK2, TILEGX_OPC_FDOUBLE_SUB_FLAGS, + BITFIELD(18, 4) /* index 637 */, + TILEGX_OPC_FDOUBLE_UNPACK_MAX, TILEGX_OPC_FDOUBLE_UNPACK_MIN, + TILEGX_OPC_FSINGLE_ADD1, TILEGX_OPC_FSINGLE_ADDSUB2, + TILEGX_OPC_FSINGLE_MUL1, TILEGX_OPC_FSINGLE_MUL2, TILEGX_OPC_FSINGLE_PACK2, + TILEGX_OPC_FSINGLE_SUB1, TILEGX_OPC_MNZ, TILEGX_OPC_MULAX, + TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HS_HU, TILEGX_OPC_MULA_HS_LS, + TILEGX_OPC_MULA_HS_LU, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_HU_LS, + BITFIELD(18, 4) /* index 654 */, + TILEGX_OPC_MULA_HU_LU, TILEGX_OPC_MULA_LS_LS, TILEGX_OPC_MULA_LS_LU, + TILEGX_OPC_MULA_LU_LU, TILEGX_OPC_MULX, TILEGX_OPC_MUL_HS_HS, + TILEGX_OPC_MUL_HS_HU, TILEGX_OPC_MUL_HS_LS, TILEGX_OPC_MUL_HS_LU, + TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_HU_LS, TILEGX_OPC_MUL_HU_LU, + TILEGX_OPC_MUL_LS_LS, TILEGX_OPC_MUL_LS_LU, TILEGX_OPC_MUL_LU_LU, + TILEGX_OPC_MZ, + BITFIELD(18, 4) /* index 671 */, + TILEGX_OPC_NOR, CHILD(688), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL, + TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_SHUFFLEBYTES, + TILEGX_OPC_SUBXSC, + BITFIELD(12, 2) /* index 688 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(693), + BITFIELD(14, 2) /* index 693 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(698), + BITFIELD(16, 2) /* index 698 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(18, 4) /* index 703 */, + TILEGX_OPC_SUBX, TILEGX_OPC_SUB, CHILD(720), TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADIFFU, TILEGX_OPC_V1AVGU, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1DDOTPUSA, TILEGX_OPC_V1DDOTPUS, TILEGX_OPC_V1DOTPA, + BITFIELD(12, 4) /* index 720 */, + TILEGX_OPC_NONE, CHILD(737), CHILD(742), CHILD(747), CHILD(752), CHILD(757), + CHILD(762), CHILD(767), CHILD(772), CHILD(777), CHILD(782), CHILD(787), + CHILD(792), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 737 */, + TILEGX_OPC_CLZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 742 */, + TILEGX_OPC_CTZ, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 747 */, + TILEGX_OPC_FNOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 752 */, + TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 757 */, + TILEGX_OPC_NOP, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 762 */, + TILEGX_OPC_PCNT, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 767 */, + TILEGX_OPC_REVBITS, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 772 */, + TILEGX_OPC_REVBYTES, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 777 */, + TILEGX_OPC_TBLIDXB0, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 782 */, + TILEGX_OPC_TBLIDXB1, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 787 */, + TILEGX_OPC_TBLIDXB2, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(16, 2) /* index 792 */, + TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 797 */, + TILEGX_OPC_V1DOTPUSA, TILEGX_OPC_V1DOTPUS, TILEGX_OPC_V1DOTP, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1MAXU, + TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MULTU, TILEGX_OPC_V1MULUS, + TILEGX_OPC_V1MULU, TILEGX_OPC_V1MZ, TILEGX_OPC_V1SADAU, TILEGX_OPC_V1SADU, + TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, + BITFIELD(18, 4) /* index 814 */, + TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, + TILEGX_OPC_V2ADD, TILEGX_OPC_V2ADIFFS, TILEGX_OPC_V2AVGS, + TILEGX_OPC_V2CMPEQ, TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, + TILEGX_OPC_V2CMPLTS, TILEGX_OPC_V2CMPLTU, TILEGX_OPC_V2CMPNE, + TILEGX_OPC_V2DOTPA, TILEGX_OPC_V2DOTP, TILEGX_OPC_V2INT_H, + BITFIELD(18, 4) /* index 831 */, + TILEGX_OPC_V2INT_L, TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, + TILEGX_OPC_V2MULFSC, TILEGX_OPC_V2MULS, TILEGX_OPC_V2MULTS, TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SADAS, TILEGX_OPC_V2SADAU, TILEGX_OPC_V2SADS, + TILEGX_OPC_V2SADU, TILEGX_OPC_V2SHLSC, + BITFIELD(18, 4) /* index 848 */, + TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, TILEGX_OPC_V2SUBSC, + TILEGX_OPC_V2SUB, TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC, + TILEGX_OPC_V4SUB, + BITFIELD(18, 3) /* index 865 */, + CHILD(874), CHILD(877), CHILD(880), CHILD(883), CHILD(886), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 874 */, + TILEGX_OPC_XOR, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 877 */, + TILEGX_OPC_V1DDOTPUA, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 880 */, + TILEGX_OPC_V1DDOTPU, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 883 */, + TILEGX_OPC_V1DOTPUA, TILEGX_OPC_NONE, + BITFIELD(21, 1) /* index 886 */, + TILEGX_OPC_V1DOTPU, TILEGX_OPC_NONE, + BITFIELD(18, 4) /* index 889 */, + TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(0, 2) /* index 906 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(911), + BITFIELD(2, 2) /* index 911 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(916), + BITFIELD(4, 2) /* index 916 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(921), + BITFIELD(6, 2) /* index 921 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(926), + BITFIELD(8, 2) /* index 926 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(931), + BITFIELD(10, 2) /* index 931 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_INFOL, +}; + +static const unsigned short decode_X1_fsm[1266] = +{ + BITFIELD(53, 9) /* index 0 */, + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), CHILD(513), + CHILD(513), CHILD(513), CHILD(513), CHILD(513), TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, + TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_ADDXLI, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_BEQZT, + TILEGX_OPC_BEQZT, TILEGX_OPC_BEQZ, TILEGX_OPC_BEQZ, TILEGX_OPC_BGEZT, + TILEGX_OPC_BGEZT, TILEGX_OPC_BGEZ, TILEGX_OPC_BGEZ, TILEGX_OPC_BGTZT, + TILEGX_OPC_BGTZT, TILEGX_OPC_BGTZ, TILEGX_OPC_BGTZ, TILEGX_OPC_BLBCT, + TILEGX_OPC_BLBCT, TILEGX_OPC_BLBC, TILEGX_OPC_BLBC, TILEGX_OPC_BLBST, + TILEGX_OPC_BLBST, TILEGX_OPC_BLBS, TILEGX_OPC_BLBS, TILEGX_OPC_BLEZT, + TILEGX_OPC_BLEZT, TILEGX_OPC_BLEZ, TILEGX_OPC_BLEZ, TILEGX_OPC_BLTZT, + TILEGX_OPC_BLTZT, TILEGX_OPC_BLTZ, TILEGX_OPC_BLTZ, TILEGX_OPC_BNEZT, + TILEGX_OPC_BNEZT, TILEGX_OPC_BNEZ, TILEGX_OPC_BNEZ, CHILD(528), CHILD(578), + CHILD(598), CHILD(703), CHILD(723), CHILD(728), CHILD(753), CHILD(758), + CHILD(763), CHILD(768), CHILD(773), CHILD(778), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, + TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_JAL, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, TILEGX_OPC_J, + CHILD(783), CHILD(800), CHILD(832), CHILD(849), CHILD(1168), CHILD(1185), + CHILD(1202), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1219), TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), CHILD(1236), + CHILD(1236), + BITFIELD(37, 2) /* index 513 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(518), + BITFIELD(39, 2) /* index 518 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, CHILD(523), + BITFIELD(41, 2) /* index 523 */, + TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_ADDLI, TILEGX_OPC_MOVELI, + BITFIELD(51, 2) /* index 528 */, + TILEGX_OPC_NONE, CHILD(533), TILEGX_OPC_ADDXI, CHILD(548), + BITFIELD(37, 2) /* index 533 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(538), + BITFIELD(39, 2) /* index 538 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(543), + BITFIELD(41, 2) /* index 543 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(31, 2) /* index 548 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(553), + BITFIELD(33, 2) /* index 553 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(558), + BITFIELD(35, 2) /* index 558 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(563), + BITFIELD(37, 2) /* index 563 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(568), + BITFIELD(39, 2) /* index 568 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(573), + BITFIELD(41, 2) /* index 573 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(51, 2) /* index 578 */, + TILEGX_OPC_CMPEQI, TILEGX_OPC_CMPLTSI, TILEGX_OPC_CMPLTUI, CHILD(583), + BITFIELD(31, 2) /* index 583 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(588), + BITFIELD(33, 2) /* index 588 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, CHILD(593), + BITFIELD(35, 2) /* index 593 */, + TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, TILEGX_OPC_LD1S_ADD, + TILEGX_OPC_PREFETCH_ADD_L1_FAULT, + BITFIELD(51, 2) /* index 598 */, + CHILD(603), CHILD(618), CHILD(633), CHILD(648), + BITFIELD(31, 2) /* index 603 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(608), + BITFIELD(33, 2) /* index 608 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, CHILD(613), + BITFIELD(35, 2) /* index 613 */, + TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, TILEGX_OPC_LD1U_ADD, + TILEGX_OPC_PREFETCH_ADD_L1, + BITFIELD(31, 2) /* index 618 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(623), + BITFIELD(33, 2) /* index 623 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, CHILD(628), + BITFIELD(35, 2) /* index 628 */, + TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, TILEGX_OPC_LD2S_ADD, + TILEGX_OPC_PREFETCH_ADD_L2_FAULT, + BITFIELD(31, 2) /* index 633 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(638), + BITFIELD(33, 2) /* index 638 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, CHILD(643), + BITFIELD(35, 2) /* index 643 */, + TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, TILEGX_OPC_LD2U_ADD, + TILEGX_OPC_PREFETCH_ADD_L2, + BITFIELD(31, 2) /* index 648 */, + CHILD(653), CHILD(653), CHILD(653), CHILD(673), + BITFIELD(43, 2) /* index 653 */, + CHILD(658), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, + BITFIELD(45, 2) /* index 658 */, + CHILD(663), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, + BITFIELD(47, 2) /* index 663 */, + CHILD(668), TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, + BITFIELD(49, 2) /* index 668 */, + TILEGX_OPC_LD4S_TLS, TILEGX_OPC_LD4S_ADD, TILEGX_OPC_LD4S_ADD, + TILEGX_OPC_LD4S_ADD, + BITFIELD(33, 2) /* index 673 */, + CHILD(653), CHILD(653), CHILD(653), CHILD(678), + BITFIELD(35, 2) /* index 678 */, + CHILD(653), CHILD(653), CHILD(653), CHILD(683), + BITFIELD(43, 2) /* index 683 */, + CHILD(688), TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + BITFIELD(45, 2) /* index 688 */, + CHILD(693), TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + BITFIELD(47, 2) /* index 693 */, + CHILD(698), TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + BITFIELD(49, 2) /* index 698 */, + TILEGX_OPC_LD4S_TLS, TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + TILEGX_OPC_PREFETCH_ADD_L3_FAULT, TILEGX_OPC_PREFETCH_ADD_L3_FAULT, + BITFIELD(51, 2) /* index 703 */, + CHILD(708), TILEGX_OPC_LDNT1S_ADD, TILEGX_OPC_LDNT1U_ADD, + TILEGX_OPC_LDNT2S_ADD, + BITFIELD(31, 2) /* index 708 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(713), + BITFIELD(33, 2) /* index 713 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, CHILD(718), + BITFIELD(35, 2) /* index 718 */, + TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, TILEGX_OPC_LD4U_ADD, + TILEGX_OPC_PREFETCH_ADD_L3, + BITFIELD(51, 2) /* index 723 */, + TILEGX_OPC_LDNT2U_ADD, TILEGX_OPC_LDNT4S_ADD, TILEGX_OPC_LDNT4U_ADD, + TILEGX_OPC_LDNT_ADD, + BITFIELD(51, 2) /* index 728 */, + CHILD(733), TILEGX_OPC_LDNA_ADD, TILEGX_OPC_MFSPR, TILEGX_OPC_MTSPR, + BITFIELD(43, 2) /* index 733 */, + CHILD(738), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, + BITFIELD(45, 2) /* index 738 */, + CHILD(743), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, + BITFIELD(47, 2) /* index 743 */, + CHILD(748), TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, + BITFIELD(49, 2) /* index 748 */, + TILEGX_OPC_LD_TLS, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, TILEGX_OPC_LD_ADD, + BITFIELD(51, 2) /* index 753 */, + TILEGX_OPC_ORI, TILEGX_OPC_ST1_ADD, TILEGX_OPC_ST2_ADD, TILEGX_OPC_ST4_ADD, + BITFIELD(51, 2) /* index 758 */, + TILEGX_OPC_STNT1_ADD, TILEGX_OPC_STNT2_ADD, TILEGX_OPC_STNT4_ADD, + TILEGX_OPC_STNT_ADD, + BITFIELD(51, 2) /* index 763 */, + TILEGX_OPC_ST_ADD, TILEGX_OPC_V1ADDI, TILEGX_OPC_V1CMPEQI, + TILEGX_OPC_V1CMPLTSI, + BITFIELD(51, 2) /* index 768 */, + TILEGX_OPC_V1CMPLTUI, TILEGX_OPC_V1MAXUI, TILEGX_OPC_V1MINUI, + TILEGX_OPC_V2ADDI, + BITFIELD(51, 2) /* index 773 */, + TILEGX_OPC_V2CMPEQI, TILEGX_OPC_V2CMPLTSI, TILEGX_OPC_V2CMPLTUI, + TILEGX_OPC_V2MAXSI, + BITFIELD(51, 2) /* index 778 */, + TILEGX_OPC_V2MINSI, TILEGX_OPC_XORI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 783 */, + TILEGX_OPC_NONE, TILEGX_OPC_ADDXSC, TILEGX_OPC_ADDX, TILEGX_OPC_ADD, + TILEGX_OPC_AND, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPEXCH4, TILEGX_OPC_CMPEXCH, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + TILEGX_OPC_CMPNE, TILEGX_OPC_DBLALIGN2, TILEGX_OPC_DBLALIGN4, + TILEGX_OPC_DBLALIGN6, + BITFIELD(49, 4) /* index 800 */, + TILEGX_OPC_EXCH4, TILEGX_OPC_EXCH, TILEGX_OPC_FETCHADD4, + TILEGX_OPC_FETCHADDGEZ4, TILEGX_OPC_FETCHADDGEZ, TILEGX_OPC_FETCHADD, + TILEGX_OPC_FETCHAND4, TILEGX_OPC_FETCHAND, TILEGX_OPC_FETCHOR4, + TILEGX_OPC_FETCHOR, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, TILEGX_OPC_NOR, + CHILD(817), TILEGX_OPC_ROTL, TILEGX_OPC_SHL1ADDX, + BITFIELD(43, 2) /* index 817 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(822), + BITFIELD(45, 2) /* index 822 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(827), + BITFIELD(47, 2) /* index 827 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(49, 4) /* index 832 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADDX, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL3ADDX, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHLX, TILEGX_OPC_SHL, + TILEGX_OPC_SHRS, TILEGX_OPC_SHRUX, TILEGX_OPC_SHRU, TILEGX_OPC_ST1, + TILEGX_OPC_ST2, TILEGX_OPC_ST4, TILEGX_OPC_STNT1, TILEGX_OPC_STNT2, + TILEGX_OPC_STNT4, + BITFIELD(46, 7) /* index 849 */, + TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, + TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, TILEGX_OPC_STNT, + TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, + TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_ST, TILEGX_OPC_SUBXSC, + TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, + TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBXSC, TILEGX_OPC_SUBX, + TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, + TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, + TILEGX_OPC_SUB, TILEGX_OPC_SUB, TILEGX_OPC_SUB, CHILD(978), CHILD(987), + CHILD(1066), CHILD(1150), CHILD(1159), TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, + TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADDUC, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, + TILEGX_OPC_V1ADD, TILEGX_OPC_V1ADD, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, TILEGX_OPC_V1CMPEQ, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, + TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLES, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLEU, + TILEGX_OPC_V1CMPLEU, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, TILEGX_OPC_V1CMPLTS, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, + TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPLTU, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1CMPNE, + TILEGX_OPC_V1CMPNE, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, TILEGX_OPC_V1INT_H, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + TILEGX_OPC_V1INT_L, TILEGX_OPC_V1INT_L, + BITFIELD(43, 3) /* index 978 */, + TILEGX_OPC_NONE, TILEGX_OPC_DRAIN, TILEGX_OPC_DTLBPR, TILEGX_OPC_FINV, + TILEGX_OPC_FLUSHWB, TILEGX_OPC_FLUSH, TILEGX_OPC_FNOP, TILEGX_OPC_ICOH, + BITFIELD(43, 3) /* index 987 */, + CHILD(996), TILEGX_OPC_INV, TILEGX_OPC_IRET, TILEGX_OPC_JALRP, + TILEGX_OPC_JALR, TILEGX_OPC_JRP, TILEGX_OPC_JR, CHILD(1051), + BITFIELD(31, 2) /* index 996 */, + CHILD(1001), CHILD(1026), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(33, 2) /* index 1001 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(1006), + BITFIELD(35, 2) /* index 1006 */, + TILEGX_OPC_ILL, CHILD(1011), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(37, 2) /* index 1011 */, + TILEGX_OPC_ILL, CHILD(1016), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(39, 2) /* index 1016 */, + TILEGX_OPC_ILL, CHILD(1021), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(41, 2) /* index 1021 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_BPT, TILEGX_OPC_ILL, + BITFIELD(33, 2) /* index 1026 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_ILL, CHILD(1031), + BITFIELD(35, 2) /* index 1031 */, + TILEGX_OPC_ILL, CHILD(1036), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(37, 2) /* index 1036 */, + TILEGX_OPC_ILL, CHILD(1041), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(39, 2) /* index 1041 */, + TILEGX_OPC_ILL, CHILD(1046), TILEGX_OPC_ILL, TILEGX_OPC_ILL, + BITFIELD(41, 2) /* index 1046 */, + TILEGX_OPC_ILL, TILEGX_OPC_ILL, TILEGX_OPC_RAISE, TILEGX_OPC_ILL, + BITFIELD(31, 2) /* index 1051 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1056), + BITFIELD(33, 2) /* index 1056 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(1061), + BITFIELD(35, 2) /* index 1061 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, + TILEGX_OPC_PREFETCH_L1_FAULT, + BITFIELD(43, 3) /* index 1066 */, + CHILD(1075), CHILD(1090), CHILD(1105), CHILD(1120), CHILD(1135), + TILEGX_OPC_LDNA, TILEGX_OPC_LDNT1S, TILEGX_OPC_LDNT1U, + BITFIELD(31, 2) /* index 1075 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1080), + BITFIELD(33, 2) /* index 1080 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(1085), + BITFIELD(35, 2) /* index 1085 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH, + BITFIELD(31, 2) /* index 1090 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1095), + BITFIELD(33, 2) /* index 1095 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(1100), + BITFIELD(35, 2) /* index 1100 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, + TILEGX_OPC_PREFETCH_L2_FAULT, + BITFIELD(31, 2) /* index 1105 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1110), + BITFIELD(33, 2) /* index 1110 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(1115), + BITFIELD(35, 2) /* index 1115 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2, + BITFIELD(31, 2) /* index 1120 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1125), + BITFIELD(33, 2) /* index 1125 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(1130), + BITFIELD(35, 2) /* index 1130 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, + TILEGX_OPC_PREFETCH_L3_FAULT, + BITFIELD(31, 2) /* index 1135 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1140), + BITFIELD(33, 2) /* index 1140 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(1145), + BITFIELD(35, 2) /* index 1145 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3, + BITFIELD(43, 3) /* index 1150 */, + TILEGX_OPC_LDNT2S, TILEGX_OPC_LDNT2U, TILEGX_OPC_LDNT4S, TILEGX_OPC_LDNT4U, + TILEGX_OPC_LDNT, TILEGX_OPC_LD, TILEGX_OPC_LNK, TILEGX_OPC_MF, + BITFIELD(43, 3) /* index 1159 */, + TILEGX_OPC_NAP, TILEGX_OPC_NOP, TILEGX_OPC_SWINT0, TILEGX_OPC_SWINT1, + TILEGX_OPC_SWINT2, TILEGX_OPC_SWINT3, TILEGX_OPC_WH64, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 1168 */, + TILEGX_OPC_V1MAXU, TILEGX_OPC_V1MINU, TILEGX_OPC_V1MNZ, TILEGX_OPC_V1MZ, + TILEGX_OPC_V1SHL, TILEGX_OPC_V1SHRS, TILEGX_OPC_V1SHRU, TILEGX_OPC_V1SUBUC, + TILEGX_OPC_V1SUB, TILEGX_OPC_V2ADDSC, TILEGX_OPC_V2ADD, TILEGX_OPC_V2CMPEQ, + TILEGX_OPC_V2CMPLES, TILEGX_OPC_V2CMPLEU, TILEGX_OPC_V2CMPLTS, + TILEGX_OPC_V2CMPLTU, + BITFIELD(49, 4) /* index 1185 */, + TILEGX_OPC_V2CMPNE, TILEGX_OPC_V2INT_H, TILEGX_OPC_V2INT_L, + TILEGX_OPC_V2MAXS, TILEGX_OPC_V2MINS, TILEGX_OPC_V2MNZ, TILEGX_OPC_V2MZ, + TILEGX_OPC_V2PACKH, TILEGX_OPC_V2PACKL, TILEGX_OPC_V2PACKUC, + TILEGX_OPC_V2SHLSC, TILEGX_OPC_V2SHL, TILEGX_OPC_V2SHRS, TILEGX_OPC_V2SHRU, + TILEGX_OPC_V2SUBSC, TILEGX_OPC_V2SUB, + BITFIELD(49, 4) /* index 1202 */, + TILEGX_OPC_V4ADDSC, TILEGX_OPC_V4ADD, TILEGX_OPC_V4INT_H, + TILEGX_OPC_V4INT_L, TILEGX_OPC_V4PACKSC, TILEGX_OPC_V4SHLSC, + TILEGX_OPC_V4SHL, TILEGX_OPC_V4SHRS, TILEGX_OPC_V4SHRU, TILEGX_OPC_V4SUBSC, + TILEGX_OPC_V4SUB, TILEGX_OPC_XOR, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(49, 4) /* index 1219 */, + TILEGX_OPC_NONE, TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHLXI, + TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, TILEGX_OPC_SHRUXI, TILEGX_OPC_V1SHLI, + TILEGX_OPC_V1SHRSI, TILEGX_OPC_V1SHRUI, TILEGX_OPC_V2SHLI, + TILEGX_OPC_V2SHRSI, TILEGX_OPC_V2SHRUI, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(31, 2) /* index 1236 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1241), + BITFIELD(33, 2) /* index 1241 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1246), + BITFIELD(35, 2) /* index 1246 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1251), + BITFIELD(37, 2) /* index 1251 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1256), + BITFIELD(39, 2) /* index 1256 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + CHILD(1261), + BITFIELD(41, 2) /* index 1261 */, + TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, TILEGX_OPC_SHL16INSLI, + TILEGX_OPC_INFOL, +}; + +static const unsigned short decode_Y0_fsm[178] = +{ + BITFIELD(27, 4) /* index 0 */, + CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(118), CHILD(123), + CHILD(128), CHILD(133), CHILD(153), CHILD(158), CHILD(163), CHILD(168), + CHILD(173), + BITFIELD(6, 2) /* index 17 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22), + BITFIELD(8, 2) /* index 22 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27), + BITFIELD(10, 2) /* index 27 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(0, 2) /* index 32 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37), + BITFIELD(2, 2) /* index 37 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42), + BITFIELD(4, 2) /* index 42 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47), + BITFIELD(6, 2) /* index 47 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52), + BITFIELD(8, 2) /* index 52 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57), + BITFIELD(10, 2) /* index 57 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(18, 2) /* index 62 */, + TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + BITFIELD(15, 5) /* index 67 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(100), + CHILD(109), TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(12, 3) /* index 100 */, + TILEGX_OPC_NONE, TILEGX_OPC_CLZ, TILEGX_OPC_CTZ, TILEGX_OPC_FNOP, + TILEGX_OPC_FSINGLE_PACK1, TILEGX_OPC_NOP, TILEGX_OPC_PCNT, + TILEGX_OPC_REVBITS, + BITFIELD(12, 3) /* index 109 */, + TILEGX_OPC_REVBYTES, TILEGX_OPC_TBLIDXB0, TILEGX_OPC_TBLIDXB1, + TILEGX_OPC_TBLIDXB2, TILEGX_OPC_TBLIDXB3, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + TILEGX_OPC_NONE, + BITFIELD(18, 2) /* index 118 */, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + BITFIELD(18, 2) /* index 123 */, + TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, TILEGX_OPC_MULAX, TILEGX_OPC_MULX, + BITFIELD(18, 2) /* index 128 */, + TILEGX_OPC_CMOVEQZ, TILEGX_OPC_CMOVNEZ, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, + BITFIELD(18, 2) /* index 133 */, + TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(138), TILEGX_OPC_XOR, + BITFIELD(12, 2) /* index 138 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(143), + BITFIELD(14, 2) /* index 143 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(148), + BITFIELD(16, 2) /* index 148 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(18, 2) /* index 153 */, + TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU, + BITFIELD(18, 2) /* index 158 */, + TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADDX, + BITFIELD(18, 2) /* index 163 */, + TILEGX_OPC_MUL_HS_HS, TILEGX_OPC_MUL_HU_HU, TILEGX_OPC_MUL_LS_LS, + TILEGX_OPC_MUL_LU_LU, + BITFIELD(18, 2) /* index 168 */, + TILEGX_OPC_MULA_HS_HS, TILEGX_OPC_MULA_HU_HU, TILEGX_OPC_MULA_LS_LS, + TILEGX_OPC_MULA_LU_LU, + BITFIELD(18, 2) /* index 173 */, + TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, +}; + +static const unsigned short decode_Y1_fsm[167] = +{ + BITFIELD(58, 4) /* index 0 */, + TILEGX_OPC_NONE, CHILD(17), TILEGX_OPC_ADDXI, CHILD(32), TILEGX_OPC_CMPEQI, + TILEGX_OPC_CMPLTSI, CHILD(62), CHILD(67), CHILD(117), CHILD(122), + CHILD(127), CHILD(132), CHILD(152), CHILD(157), CHILD(162), TILEGX_OPC_NONE, + BITFIELD(37, 2) /* index 17 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(22), + BITFIELD(39, 2) /* index 22 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, CHILD(27), + BITFIELD(41, 2) /* index 27 */, + TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_ADDI, TILEGX_OPC_MOVEI, + BITFIELD(31, 2) /* index 32 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(37), + BITFIELD(33, 2) /* index 37 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(42), + BITFIELD(35, 2) /* index 42 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(47), + BITFIELD(37, 2) /* index 47 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(52), + BITFIELD(39, 2) /* index 52 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, CHILD(57), + BITFIELD(41, 2) /* index 57 */, + TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_ANDI, TILEGX_OPC_INFO, + BITFIELD(49, 2) /* index 62 */, + TILEGX_OPC_ADDX, TILEGX_OPC_ADD, TILEGX_OPC_SUBX, TILEGX_OPC_SUB, + BITFIELD(47, 4) /* index 67 */, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL1ADD, + TILEGX_OPC_SHL1ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, + TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL2ADD, TILEGX_OPC_SHL3ADD, + TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, TILEGX_OPC_SHL3ADD, CHILD(84), + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_NONE, + BITFIELD(43, 3) /* index 84 */, + CHILD(93), CHILD(96), CHILD(99), CHILD(102), CHILD(105), CHILD(108), + CHILD(111), CHILD(114), + BITFIELD(46, 1) /* index 93 */, + TILEGX_OPC_NONE, TILEGX_OPC_FNOP, + BITFIELD(46, 1) /* index 96 */, + TILEGX_OPC_NONE, TILEGX_OPC_ILL, + BITFIELD(46, 1) /* index 99 */, + TILEGX_OPC_NONE, TILEGX_OPC_JALRP, + BITFIELD(46, 1) /* index 102 */, + TILEGX_OPC_NONE, TILEGX_OPC_JALR, + BITFIELD(46, 1) /* index 105 */, + TILEGX_OPC_NONE, TILEGX_OPC_JRP, + BITFIELD(46, 1) /* index 108 */, + TILEGX_OPC_NONE, TILEGX_OPC_JR, + BITFIELD(46, 1) /* index 111 */, + TILEGX_OPC_NONE, TILEGX_OPC_LNK, + BITFIELD(46, 1) /* index 114 */, + TILEGX_OPC_NONE, TILEGX_OPC_NOP, + BITFIELD(49, 2) /* index 117 */, + TILEGX_OPC_CMPLES, TILEGX_OPC_CMPLEU, TILEGX_OPC_CMPLTS, TILEGX_OPC_CMPLTU, + BITFIELD(49, 2) /* index 122 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_CMPEQ, TILEGX_OPC_CMPNE, + BITFIELD(49, 2) /* index 127 */, + TILEGX_OPC_NONE, TILEGX_OPC_NONE, TILEGX_OPC_MNZ, TILEGX_OPC_MZ, + BITFIELD(49, 2) /* index 132 */, + TILEGX_OPC_AND, TILEGX_OPC_NOR, CHILD(137), TILEGX_OPC_XOR, + BITFIELD(43, 2) /* index 137 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(142), + BITFIELD(45, 2) /* index 142 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, CHILD(147), + BITFIELD(47, 2) /* index 147 */, + TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_OR, TILEGX_OPC_MOVE, + BITFIELD(49, 2) /* index 152 */, + TILEGX_OPC_ROTL, TILEGX_OPC_SHL, TILEGX_OPC_SHRS, TILEGX_OPC_SHRU, + BITFIELD(49, 2) /* index 157 */, + TILEGX_OPC_NONE, TILEGX_OPC_SHL1ADDX, TILEGX_OPC_SHL2ADDX, + TILEGX_OPC_SHL3ADDX, + BITFIELD(49, 2) /* index 162 */, + TILEGX_OPC_ROTLI, TILEGX_OPC_SHLI, TILEGX_OPC_SHRSI, TILEGX_OPC_SHRUI, +}; + +static const unsigned short decode_Y2_fsm[118] = +{ + BITFIELD(62, 2) /* index 0 */, + TILEGX_OPC_NONE, CHILD(5), CHILD(66), CHILD(109), + BITFIELD(55, 3) /* index 5 */, + CHILD(14), CHILD(14), CHILD(14), CHILD(17), CHILD(40), CHILD(40), CHILD(40), + CHILD(43), + BITFIELD(26, 1) /* index 14 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1U, + BITFIELD(26, 1) /* index 17 */, + CHILD(20), CHILD(30), + BITFIELD(51, 2) /* index 20 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, CHILD(25), + BITFIELD(53, 2) /* index 25 */, + TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, TILEGX_OPC_LD1S, + TILEGX_OPC_PREFETCH_L1_FAULT, + BITFIELD(51, 2) /* index 30 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, CHILD(35), + BITFIELD(53, 2) /* index 35 */, + TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_LD1U, TILEGX_OPC_PREFETCH, + BITFIELD(26, 1) /* index 40 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2U, + BITFIELD(26, 1) /* index 43 */, + CHILD(46), CHILD(56), + BITFIELD(51, 2) /* index 46 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, CHILD(51), + BITFIELD(53, 2) /* index 51 */, + TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, TILEGX_OPC_LD2S, + TILEGX_OPC_PREFETCH_L2_FAULT, + BITFIELD(51, 2) /* index 56 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, CHILD(61), + BITFIELD(53, 2) /* index 61 */, + TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_LD2U, TILEGX_OPC_PREFETCH_L2, + BITFIELD(56, 2) /* index 66 */, + CHILD(71), CHILD(74), CHILD(90), CHILD(93), + BITFIELD(26, 1) /* index 71 */, + TILEGX_OPC_NONE, TILEGX_OPC_LD4S, + BITFIELD(26, 1) /* index 74 */, + TILEGX_OPC_NONE, CHILD(77), + BITFIELD(51, 2) /* index 77 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(82), + BITFIELD(53, 2) /* index 82 */, + TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, TILEGX_OPC_LD4S, CHILD(87), + BITFIELD(55, 1) /* index 87 */, + TILEGX_OPC_LD4S, TILEGX_OPC_PREFETCH_L3_FAULT, + BITFIELD(26, 1) /* index 90 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD, + BITFIELD(26, 1) /* index 93 */, + CHILD(96), TILEGX_OPC_LD, + BITFIELD(51, 2) /* index 96 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(101), + BITFIELD(53, 2) /* index 101 */, + TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, TILEGX_OPC_LD4U, CHILD(106), + BITFIELD(55, 1) /* index 106 */, + TILEGX_OPC_LD4U, TILEGX_OPC_PREFETCH_L3, + BITFIELD(26, 1) /* index 109 */, + CHILD(112), CHILD(115), + BITFIELD(57, 1) /* index 112 */, + TILEGX_OPC_ST1, TILEGX_OPC_ST4, + BITFIELD(57, 1) /* index 115 */, + TILEGX_OPC_ST2, TILEGX_OPC_ST, +}; + +#undef BITFIELD +#undef CHILD + +const unsigned short * const +tilegx_bundle_decoder_fsms[TILEGX_NUM_PIPELINE_ENCODINGS] = +{ + decode_X0_fsm, + decode_X1_fsm, + decode_Y0_fsm, + decode_Y1_fsm, + decode_Y2_fsm +}; + +const struct tilegx_operand tilegx_operands[35] = +{ + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X0), + 8, 1, 0, 0, 0, 0, + create_Imm8_X0, get_Imm8_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Imm8_X1, get_Imm8_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y0), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y0, get_Imm8_Y0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM8_Y1), + 8, 1, 0, 0, 0, 0, + create_Imm8_Y1, get_Imm8_Y1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X0_HW0_LAST), + 16, 1, 0, 0, 0, 0, + create_Imm16_X0, get_Imm16_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_IMM16_X1_HW0_LAST), + 16, 1, 0, 0, 0, 0, + create_Imm16_X1, get_Imm16_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X1, get_Dest_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_X0, get_SrcA_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y0, get_SrcA_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_Dest_Y1, get_Dest_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y1, get_SrcA_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcA_Y2, get_SrcA_Y2 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_SrcA_X1, get_SrcA_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X0, get_SrcB_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_X1, get_SrcB_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y0, get_SrcB_Y0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcB_Y1, get_SrcB_Y1 + }, + { + TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_BROFF_X1), + 17, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_BrOff_X1, get_BrOff_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMSTART_X0), + 6, 0, 0, 0, 0, 0, + create_BFStart_X0, get_BFStart_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_MMEND_X0), + 6, 0, 0, 0, 0, 0, + create_BFEnd_X0, get_BFEnd_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_X0, get_Dest_X0 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 1, 0, 0, + create_Dest_Y0, get_Dest_Y0 + }, + { + TILEGX_OP_TYPE_ADDRESS, BFD_RELOC(TILEGX_JUMPOFF_X1), + 27, 1, 0, 0, 1, TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES, + create_JumpOff_X1, get_JumpOff_X1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 0, 1, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MF_IMM14_X1), + 14, 0, 0, 0, 0, 0, + create_MF_Imm14_X1, get_MF_Imm14_X1 + }, + { + TILEGX_OP_TYPE_SPR, BFD_RELOC(TILEGX_MT_IMM14_X1), + 14, 0, 0, 0, 0, 0, + create_MT_Imm14_X1, get_MT_Imm14_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X0), + 6, 0, 0, 0, 0, 0, + create_ShAmt_X0, get_ShAmt_X0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_X1), + 6, 0, 0, 0, 0, 0, + create_ShAmt_X1, get_ShAmt_X1 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y0), + 6, 0, 0, 0, 0, 0, + create_ShAmt_Y0, get_ShAmt_Y0 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_SHAMT_Y1), + 6, 0, 0, 0, 0, 0, + create_ShAmt_Y1, get_ShAmt_Y1 + }, + { + TILEGX_OP_TYPE_REGISTER, BFD_RELOC(NONE), + 6, 0, 1, 0, 0, 0, + create_SrcBDest_Y2, get_SrcBDest_Y2 + }, + { + TILEGX_OP_TYPE_IMMEDIATE, BFD_RELOC(TILEGX_DEST_IMM8_X1), + 8, 1, 0, 0, 0, 0, + create_Dest_Imm8_X1, get_Dest_Imm8_X1 + } +}; + +/* Given a set of bundle bits and a specific pipe, returns which + * instruction the bundle contains in that pipe. + */ +const struct tilegx_opcode * +find_opcode(tilegx_bundle_bits bits, tilegx_pipeline pipe) +{ + const unsigned short *table = tilegx_bundle_decoder_fsms[pipe]; + int index = 0; + + while (1) + { + unsigned short bitspec = table[index]; + unsigned int bitfield = + ((unsigned int)(bits >> (bitspec & 63))) & (bitspec >> 6); + + unsigned short next = table[index + 1 + bitfield]; + if (next <= TILEGX_OPC_NONE) + return &tilegx_opcodes[next]; + + index = next - TILEGX_OPC_NONE; + } +} + +int +parse_insn_tilegx(tilegx_bundle_bits bits, + unsigned long long pc, + struct tilegx_decoded_instruction + decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]) +{ + int num_instructions = 0; + int pipe; + + int min_pipe, max_pipe; + if ((bits & TILEGX_BUNDLE_MODE_MASK) == 0) + { + min_pipe = TILEGX_PIPELINE_X0; + max_pipe = TILEGX_PIPELINE_X1; + } + else + { + min_pipe = TILEGX_PIPELINE_Y0; + max_pipe = TILEGX_PIPELINE_Y2; + } + + /* For each pipe, find an instruction that fits. */ + for (pipe = min_pipe; pipe <= max_pipe; pipe++) + { + const struct tilegx_opcode *opc; + struct tilegx_decoded_instruction *d; + int i; + + d = &decoded[num_instructions++]; + opc = find_opcode (bits, (tilegx_pipeline)pipe); + d->opcode = opc; + + /* Decode each operand, sign extending, etc. as appropriate. */ + for (i = 0; i < opc->num_operands; i++) + { + const struct tilegx_operand *op = + &tilegx_operands[opc->operands[pipe][i]]; + int raw_opval = op->extract (bits); + long long opval; + + if (op->is_signed) + { + /* Sign-extend the operand. */ + int shift = (int)((sizeof(int) * 8) - op->num_bits); + raw_opval = (raw_opval << shift) >> shift; + } + + /* Adjust PC-relative scaled branch offsets. */ + if (op->type == TILEGX_OP_TYPE_ADDRESS) + opval = (raw_opval * TILEGX_BUNDLE_SIZE_IN_BYTES) + pc; + else + opval = raw_opval; + + /* Record the final value. */ + d->operands[i] = op; + d->operand_values[i] = opval; + } + } + + return num_instructions; +} + +struct tilegx_spr +{ + /* The number */ + int number; + + /* The name */ + const char *name; +}; + +static int +tilegx_spr_compare (const void *a_ptr, const void *b_ptr) +{ + const struct tilegx_spr *a = (const struct tilegx_spr *) a_ptr; + const struct tilegx_spr *b = (const struct tilegx_spr *) b_ptr; + return (a->number - b->number); +} + +const struct tilegx_spr tilegx_sprs[] = { + { 0, "MPL_MEM_ERROR_SET_0" }, + { 1, "MPL_MEM_ERROR_SET_1" }, + { 2, "MPL_MEM_ERROR_SET_2" }, + { 3, "MPL_MEM_ERROR_SET_3" }, + { 4, "MPL_MEM_ERROR" }, + { 5, "MEM_ERROR_CBOX_ADDR" }, + { 6, "MEM_ERROR_CBOX_STATUS" }, + { 7, "MEM_ERROR_ENABLE" }, + { 8, "MEM_ERROR_MBOX_ADDR" }, + { 9, "MEM_ERROR_MBOX_STATUS" }, + { 10, "SBOX_ERROR" }, + { 11, "XDN_DEMUX_ERROR" }, + { 256, "MPL_SINGLE_STEP_3_SET_0" }, + { 257, "MPL_SINGLE_STEP_3_SET_1" }, + { 258, "MPL_SINGLE_STEP_3_SET_2" }, + { 259, "MPL_SINGLE_STEP_3_SET_3" }, + { 260, "MPL_SINGLE_STEP_3" }, + { 261, "SINGLE_STEP_CONTROL_3" }, + { 512, "MPL_SINGLE_STEP_2_SET_0" }, + { 513, "MPL_SINGLE_STEP_2_SET_1" }, + { 514, "MPL_SINGLE_STEP_2_SET_2" }, + { 515, "MPL_SINGLE_STEP_2_SET_3" }, + { 516, "MPL_SINGLE_STEP_2" }, + { 517, "SINGLE_STEP_CONTROL_2" }, + { 768, "MPL_SINGLE_STEP_1_SET_0" }, + { 769, "MPL_SINGLE_STEP_1_SET_1" }, + { 770, "MPL_SINGLE_STEP_1_SET_2" }, + { 771, "MPL_SINGLE_STEP_1_SET_3" }, + { 772, "MPL_SINGLE_STEP_1" }, + { 773, "SINGLE_STEP_CONTROL_1" }, + { 1024, "MPL_SINGLE_STEP_0_SET_0" }, + { 1025, "MPL_SINGLE_STEP_0_SET_1" }, + { 1026, "MPL_SINGLE_STEP_0_SET_2" }, + { 1027, "MPL_SINGLE_STEP_0_SET_3" }, + { 1028, "MPL_SINGLE_STEP_0" }, + { 1029, "SINGLE_STEP_CONTROL_0" }, + { 1280, "MPL_IDN_COMPLETE_SET_0" }, + { 1281, "MPL_IDN_COMPLETE_SET_1" }, + { 1282, "MPL_IDN_COMPLETE_SET_2" }, + { 1283, "MPL_IDN_COMPLETE_SET_3" }, + { 1284, "MPL_IDN_COMPLETE" }, + { 1285, "IDN_COMPLETE_PENDING" }, + { 1536, "MPL_UDN_COMPLETE_SET_0" }, + { 1537, "MPL_UDN_COMPLETE_SET_1" }, + { 1538, "MPL_UDN_COMPLETE_SET_2" }, + { 1539, "MPL_UDN_COMPLETE_SET_3" }, + { 1540, "MPL_UDN_COMPLETE" }, + { 1541, "UDN_COMPLETE_PENDING" }, + { 1792, "MPL_ITLB_MISS_SET_0" }, + { 1793, "MPL_ITLB_MISS_SET_1" }, + { 1794, "MPL_ITLB_MISS_SET_2" }, + { 1795, "MPL_ITLB_MISS_SET_3" }, + { 1796, "MPL_ITLB_MISS" }, + { 1797, "ITLB_TSB_BASE_ADDR_0" }, + { 1798, "ITLB_TSB_BASE_ADDR_1" }, + { 1920, "ITLB_CURRENT_ATTR" }, + { 1921, "ITLB_CURRENT_PA" }, + { 1922, "ITLB_CURRENT_VA" }, + { 1923, "ITLB_INDEX" }, + { 1924, "ITLB_MATCH_0" }, + { 1925, "ITLB_PERF" }, + { 1926, "ITLB_PR" }, + { 1927, "ITLB_TSB_ADDR_0" }, + { 1928, "ITLB_TSB_ADDR_1" }, + { 1929, "ITLB_TSB_FILL_CURRENT_ATTR" }, + { 1930, "ITLB_TSB_FILL_MATCH" }, + { 1931, "NUMBER_ITLB" }, + { 1932, "REPLACEMENT_ITLB" }, + { 1933, "WIRED_ITLB" }, + { 2048, "MPL_ILL_SET_0" }, + { 2049, "MPL_ILL_SET_1" }, + { 2050, "MPL_ILL_SET_2" }, + { 2051, "MPL_ILL_SET_3" }, + { 2052, "MPL_ILL" }, + { 2304, "MPL_GPV_SET_0" }, + { 2305, "MPL_GPV_SET_1" }, + { 2306, "MPL_GPV_SET_2" }, + { 2307, "MPL_GPV_SET_3" }, + { 2308, "MPL_GPV" }, + { 2309, "GPV_REASON" }, + { 2560, "MPL_IDN_ACCESS_SET_0" }, + { 2561, "MPL_IDN_ACCESS_SET_1" }, + { 2562, "MPL_IDN_ACCESS_SET_2" }, + { 2563, "MPL_IDN_ACCESS_SET_3" }, + { 2564, "MPL_IDN_ACCESS" }, + { 2565, "IDN_DEMUX_COUNT_0" }, + { 2566, "IDN_DEMUX_COUNT_1" }, + { 2567, "IDN_FLUSH_EGRESS" }, + { 2568, "IDN_PENDING" }, + { 2569, "IDN_ROUTE_ORDER" }, + { 2570, "IDN_SP_FIFO_CNT" }, + { 2688, "IDN_DATA_AVAIL" }, + { 2816, "MPL_UDN_ACCESS_SET_0" }, + { 2817, "MPL_UDN_ACCESS_SET_1" }, + { 2818, "MPL_UDN_ACCESS_SET_2" }, + { 2819, "MPL_UDN_ACCESS_SET_3" }, + { 2820, "MPL_UDN_ACCESS" }, + { 2821, "UDN_DEMUX_COUNT_0" }, + { 2822, "UDN_DEMUX_COUNT_1" }, + { 2823, "UDN_DEMUX_COUNT_2" }, + { 2824, "UDN_DEMUX_COUNT_3" }, + { 2825, "UDN_FLUSH_EGRESS" }, + { 2826, "UDN_PENDING" }, + { 2827, "UDN_ROUTE_ORDER" }, + { 2828, "UDN_SP_FIFO_CNT" }, + { 2944, "UDN_DATA_AVAIL" }, + { 3072, "MPL_SWINT_3_SET_0" }, + { 3073, "MPL_SWINT_3_SET_1" }, + { 3074, "MPL_SWINT_3_SET_2" }, + { 3075, "MPL_SWINT_3_SET_3" }, + { 3076, "MPL_SWINT_3" }, + { 3328, "MPL_SWINT_2_SET_0" }, + { 3329, "MPL_SWINT_2_SET_1" }, + { 3330, "MPL_SWINT_2_SET_2" }, + { 3331, "MPL_SWINT_2_SET_3" }, + { 3332, "MPL_SWINT_2" }, + { 3584, "MPL_SWINT_1_SET_0" }, + { 3585, "MPL_SWINT_1_SET_1" }, + { 3586, "MPL_SWINT_1_SET_2" }, + { 3587, "MPL_SWINT_1_SET_3" }, + { 3588, "MPL_SWINT_1" }, + { 3840, "MPL_SWINT_0_SET_0" }, + { 3841, "MPL_SWINT_0_SET_1" }, + { 3842, "MPL_SWINT_0_SET_2" }, + { 3843, "MPL_SWINT_0_SET_3" }, + { 3844, "MPL_SWINT_0" }, + { 4096, "MPL_ILL_TRANS_SET_0" }, + { 4097, "MPL_ILL_TRANS_SET_1" }, + { 4098, "MPL_ILL_TRANS_SET_2" }, + { 4099, "MPL_ILL_TRANS_SET_3" }, + { 4100, "MPL_ILL_TRANS" }, + { 4101, "ILL_TRANS_REASON" }, + { 4102, "ILL_VA_PC" }, + { 4352, "MPL_UNALIGN_DATA_SET_0" }, + { 4353, "MPL_UNALIGN_DATA_SET_1" }, + { 4354, "MPL_UNALIGN_DATA_SET_2" }, + { 4355, "MPL_UNALIGN_DATA_SET_3" }, + { 4356, "MPL_UNALIGN_DATA" }, + { 4608, "MPL_DTLB_MISS_SET_0" }, + { 4609, "MPL_DTLB_MISS_SET_1" }, + { 4610, "MPL_DTLB_MISS_SET_2" }, + { 4611, "MPL_DTLB_MISS_SET_3" }, + { 4612, "MPL_DTLB_MISS" }, + { 4613, "DTLB_TSB_BASE_ADDR_0" }, + { 4614, "DTLB_TSB_BASE_ADDR_1" }, + { 4736, "AAR" }, + { 4737, "CACHE_PINNED_WAYS" }, + { 4738, "DTLB_BAD_ADDR" }, + { 4739, "DTLB_BAD_ADDR_REASON" }, + { 4740, "DTLB_CURRENT_ATTR" }, + { 4741, "DTLB_CURRENT_PA" }, + { 4742, "DTLB_CURRENT_VA" }, + { 4743, "DTLB_INDEX" }, + { 4744, "DTLB_MATCH_0" }, + { 4745, "DTLB_PERF" }, + { 4746, "DTLB_TSB_ADDR_0" }, + { 4747, "DTLB_TSB_ADDR_1" }, + { 4748, "DTLB_TSB_FILL_CURRENT_ATTR" }, + { 4749, "DTLB_TSB_FILL_MATCH" }, + { 4750, "NUMBER_DTLB" }, + { 4751, "REPLACEMENT_DTLB" }, + { 4752, "WIRED_DTLB" }, + { 4864, "MPL_DTLB_ACCESS_SET_0" }, + { 4865, "MPL_DTLB_ACCESS_SET_1" }, + { 4866, "MPL_DTLB_ACCESS_SET_2" }, + { 4867, "MPL_DTLB_ACCESS_SET_3" }, + { 4868, "MPL_DTLB_ACCESS" }, + { 5120, "MPL_IDN_FIREWALL_SET_0" }, + { 5121, "MPL_IDN_FIREWALL_SET_1" }, + { 5122, "MPL_IDN_FIREWALL_SET_2" }, + { 5123, "MPL_IDN_FIREWALL_SET_3" }, + { 5124, "MPL_IDN_FIREWALL" }, + { 5125, "IDN_DIRECTION_PROTECT" }, + { 5376, "MPL_UDN_FIREWALL_SET_0" }, + { 5377, "MPL_UDN_FIREWALL_SET_1" }, + { 5378, "MPL_UDN_FIREWALL_SET_2" }, + { 5379, "MPL_UDN_FIREWALL_SET_3" }, + { 5380, "MPL_UDN_FIREWALL" }, + { 5381, "UDN_DIRECTION_PROTECT" }, + { 5632, "MPL_TILE_TIMER_SET_0" }, + { 5633, "MPL_TILE_TIMER_SET_1" }, + { 5634, "MPL_TILE_TIMER_SET_2" }, + { 5635, "MPL_TILE_TIMER_SET_3" }, + { 5636, "MPL_TILE_TIMER" }, + { 5637, "TILE_TIMER_CONTROL" }, + { 5888, "MPL_AUX_TILE_TIMER_SET_0" }, + { 5889, "MPL_AUX_TILE_TIMER_SET_1" }, + { 5890, "MPL_AUX_TILE_TIMER_SET_2" }, + { 5891, "MPL_AUX_TILE_TIMER_SET_3" }, + { 5892, "MPL_AUX_TILE_TIMER" }, + { 5893, "AUX_TILE_TIMER_CONTROL" }, + { 6144, "MPL_IDN_TIMER_SET_0" }, + { 6145, "MPL_IDN_TIMER_SET_1" }, + { 6146, "MPL_IDN_TIMER_SET_2" }, + { 6147, "MPL_IDN_TIMER_SET_3" }, + { 6148, "MPL_IDN_TIMER" }, + { 6149, "IDN_DEADLOCK_COUNT" }, + { 6150, "IDN_DEADLOCK_TIMEOUT" }, + { 6400, "MPL_UDN_TIMER_SET_0" }, + { 6401, "MPL_UDN_TIMER_SET_1" }, + { 6402, "MPL_UDN_TIMER_SET_2" }, + { 6403, "MPL_UDN_TIMER_SET_3" }, + { 6404, "MPL_UDN_TIMER" }, + { 6405, "UDN_DEADLOCK_COUNT" }, + { 6406, "UDN_DEADLOCK_TIMEOUT" }, + { 6656, "MPL_IDN_AVAIL_SET_0" }, + { 6657, "MPL_IDN_AVAIL_SET_1" }, + { 6658, "MPL_IDN_AVAIL_SET_2" }, + { 6659, "MPL_IDN_AVAIL_SET_3" }, + { 6660, "MPL_IDN_AVAIL" }, + { 6661, "IDN_AVAIL_EN" }, + { 6912, "MPL_UDN_AVAIL_SET_0" }, + { 6913, "MPL_UDN_AVAIL_SET_1" }, + { 6914, "MPL_UDN_AVAIL_SET_2" }, + { 6915, "MPL_UDN_AVAIL_SET_3" }, + { 6916, "MPL_UDN_AVAIL" }, + { 6917, "UDN_AVAIL_EN" }, + { 7168, "MPL_IPI_3_SET_0" }, + { 7169, "MPL_IPI_3_SET_1" }, + { 7170, "MPL_IPI_3_SET_2" }, + { 7171, "MPL_IPI_3_SET_3" }, + { 7172, "MPL_IPI_3" }, + { 7173, "IPI_EVENT_3" }, + { 7174, "IPI_EVENT_RESET_3" }, + { 7175, "IPI_EVENT_SET_3" }, + { 7176, "IPI_MASK_3" }, + { 7177, "IPI_MASK_RESET_3" }, + { 7178, "IPI_MASK_SET_3" }, + { 7424, "MPL_IPI_2_SET_0" }, + { 7425, "MPL_IPI_2_SET_1" }, + { 7426, "MPL_IPI_2_SET_2" }, + { 7427, "MPL_IPI_2_SET_3" }, + { 7428, "MPL_IPI_2" }, + { 7429, "IPI_EVENT_2" }, + { 7430, "IPI_EVENT_RESET_2" }, + { 7431, "IPI_EVENT_SET_2" }, + { 7432, "IPI_MASK_2" }, + { 7433, "IPI_MASK_RESET_2" }, + { 7434, "IPI_MASK_SET_2" }, + { 7680, "MPL_IPI_1_SET_0" }, + { 7681, "MPL_IPI_1_SET_1" }, + { 7682, "MPL_IPI_1_SET_2" }, + { 7683, "MPL_IPI_1_SET_3" }, + { 7684, "MPL_IPI_1" }, + { 7685, "IPI_EVENT_1" }, + { 7686, "IPI_EVENT_RESET_1" }, + { 7687, "IPI_EVENT_SET_1" }, + { 7688, "IPI_MASK_1" }, + { 7689, "IPI_MASK_RESET_1" }, + { 7690, "IPI_MASK_SET_1" }, + { 7936, "MPL_IPI_0_SET_0" }, + { 7937, "MPL_IPI_0_SET_1" }, + { 7938, "MPL_IPI_0_SET_2" }, + { 7939, "MPL_IPI_0_SET_3" }, + { 7940, "MPL_IPI_0" }, + { 7941, "IPI_EVENT_0" }, + { 7942, "IPI_EVENT_RESET_0" }, + { 7943, "IPI_EVENT_SET_0" }, + { 7944, "IPI_MASK_0" }, + { 7945, "IPI_MASK_RESET_0" }, + { 7946, "IPI_MASK_SET_0" }, + { 8192, "MPL_PERF_COUNT_SET_0" }, + { 8193, "MPL_PERF_COUNT_SET_1" }, + { 8194, "MPL_PERF_COUNT_SET_2" }, + { 8195, "MPL_PERF_COUNT_SET_3" }, + { 8196, "MPL_PERF_COUNT" }, + { 8197, "PERF_COUNT_0" }, + { 8198, "PERF_COUNT_1" }, + { 8199, "PERF_COUNT_CTL" }, + { 8200, "PERF_COUNT_DN_CTL" }, + { 8201, "PERF_COUNT_STS" }, + { 8202, "WATCH_MASK" }, + { 8203, "WATCH_VAL" }, + { 8448, "MPL_AUX_PERF_COUNT_SET_0" }, + { 8449, "MPL_AUX_PERF_COUNT_SET_1" }, + { 8450, "MPL_AUX_PERF_COUNT_SET_2" }, + { 8451, "MPL_AUX_PERF_COUNT_SET_3" }, + { 8452, "MPL_AUX_PERF_COUNT" }, + { 8453, "AUX_PERF_COUNT_0" }, + { 8454, "AUX_PERF_COUNT_1" }, + { 8455, "AUX_PERF_COUNT_CTL" }, + { 8456, "AUX_PERF_COUNT_STS" }, + { 8704, "MPL_INTCTRL_3_SET_0" }, + { 8705, "MPL_INTCTRL_3_SET_1" }, + { 8706, "MPL_INTCTRL_3_SET_2" }, + { 8707, "MPL_INTCTRL_3_SET_3" }, + { 8708, "MPL_INTCTRL_3" }, + { 8709, "INTCTRL_3_STATUS" }, + { 8710, "INTERRUPT_MASK_3" }, + { 8711, "INTERRUPT_MASK_RESET_3" }, + { 8712, "INTERRUPT_MASK_SET_3" }, + { 8713, "INTERRUPT_VECTOR_BASE_3" }, + { 8714, "SINGLE_STEP_EN_0_3" }, + { 8715, "SINGLE_STEP_EN_1_3" }, + { 8716, "SINGLE_STEP_EN_2_3" }, + { 8717, "SINGLE_STEP_EN_3_3" }, + { 8832, "EX_CONTEXT_3_0" }, + { 8833, "EX_CONTEXT_3_1" }, + { 8834, "SYSTEM_SAVE_3_0" }, + { 8835, "SYSTEM_SAVE_3_1" }, + { 8836, "SYSTEM_SAVE_3_2" }, + { 8837, "SYSTEM_SAVE_3_3" }, + { 8960, "MPL_INTCTRL_2_SET_0" }, + { 8961, "MPL_INTCTRL_2_SET_1" }, + { 8962, "MPL_INTCTRL_2_SET_2" }, + { 8963, "MPL_INTCTRL_2_SET_3" }, + { 8964, "MPL_INTCTRL_2" }, + { 8965, "INTCTRL_2_STATUS" }, + { 8966, "INTERRUPT_MASK_2" }, + { 8967, "INTERRUPT_MASK_RESET_2" }, + { 8968, "INTERRUPT_MASK_SET_2" }, + { 8969, "INTERRUPT_VECTOR_BASE_2" }, + { 8970, "SINGLE_STEP_EN_0_2" }, + { 8971, "SINGLE_STEP_EN_1_2" }, + { 8972, "SINGLE_STEP_EN_2_2" }, + { 8973, "SINGLE_STEP_EN_3_2" }, + { 9088, "EX_CONTEXT_2_0" }, + { 9089, "EX_CONTEXT_2_1" }, + { 9090, "SYSTEM_SAVE_2_0" }, + { 9091, "SYSTEM_SAVE_2_1" }, + { 9092, "SYSTEM_SAVE_2_2" }, + { 9093, "SYSTEM_SAVE_2_3" }, + { 9216, "MPL_INTCTRL_1_SET_0" }, + { 9217, "MPL_INTCTRL_1_SET_1" }, + { 9218, "MPL_INTCTRL_1_SET_2" }, + { 9219, "MPL_INTCTRL_1_SET_3" }, + { 9220, "MPL_INTCTRL_1" }, + { 9221, "INTCTRL_1_STATUS" }, + { 9222, "INTERRUPT_MASK_1" }, + { 9223, "INTERRUPT_MASK_RESET_1" }, + { 9224, "INTERRUPT_MASK_SET_1" }, + { 9225, "INTERRUPT_VECTOR_BASE_1" }, + { 9226, "SINGLE_STEP_EN_0_1" }, + { 9227, "SINGLE_STEP_EN_1_1" }, + { 9228, "SINGLE_STEP_EN_2_1" }, + { 9229, "SINGLE_STEP_EN_3_1" }, + { 9344, "EX_CONTEXT_1_0" }, + { 9345, "EX_CONTEXT_1_1" }, + { 9346, "SYSTEM_SAVE_1_0" }, + { 9347, "SYSTEM_SAVE_1_1" }, + { 9348, "SYSTEM_SAVE_1_2" }, + { 9349, "SYSTEM_SAVE_1_3" }, + { 9472, "MPL_INTCTRL_0_SET_0" }, + { 9473, "MPL_INTCTRL_0_SET_1" }, + { 9474, "MPL_INTCTRL_0_SET_2" }, + { 9475, "MPL_INTCTRL_0_SET_3" }, + { 9476, "MPL_INTCTRL_0" }, + { 9477, "INTCTRL_0_STATUS" }, + { 9478, "INTERRUPT_MASK_0" }, + { 9479, "INTERRUPT_MASK_RESET_0" }, + { 9480, "INTERRUPT_MASK_SET_0" }, + { 9481, "INTERRUPT_VECTOR_BASE_0" }, + { 9482, "SINGLE_STEP_EN_0_0" }, + { 9483, "SINGLE_STEP_EN_1_0" }, + { 9484, "SINGLE_STEP_EN_2_0" }, + { 9485, "SINGLE_STEP_EN_3_0" }, + { 9600, "EX_CONTEXT_0_0" }, + { 9601, "EX_CONTEXT_0_1" }, + { 9602, "SYSTEM_SAVE_0_0" }, + { 9603, "SYSTEM_SAVE_0_1" }, + { 9604, "SYSTEM_SAVE_0_2" }, + { 9605, "SYSTEM_SAVE_0_3" }, + { 9728, "MPL_BOOT_ACCESS_SET_0" }, + { 9729, "MPL_BOOT_ACCESS_SET_1" }, + { 9730, "MPL_BOOT_ACCESS_SET_2" }, + { 9731, "MPL_BOOT_ACCESS_SET_3" }, + { 9732, "MPL_BOOT_ACCESS" }, + { 9733, "BIG_ENDIAN_CONFIG" }, + { 9734, "CACHE_INVALIDATION_COMPRESSION_MODE" }, + { 9735, "CACHE_INVALIDATION_MASK_0" }, + { 9736, "CACHE_INVALIDATION_MASK_1" }, + { 9737, "CACHE_INVALIDATION_MASK_2" }, + { 9738, "CBOX_CACHEASRAM_CONFIG" }, + { 9739, "CBOX_CACHE_CONFIG" }, + { 9740, "CBOX_HOME_MAP_ADDR" }, + { 9741, "CBOX_HOME_MAP_DATA" }, + { 9742, "CBOX_MMAP_0" }, + { 9743, "CBOX_MMAP_1" }, + { 9744, "CBOX_MMAP_2" }, + { 9745, "CBOX_MMAP_3" }, + { 9746, "CBOX_MSR" }, + { 9747, "DIAG_BCST_CTL" }, + { 9748, "DIAG_BCST_MASK" }, + { 9749, "DIAG_BCST_TRIGGER" }, + { 9750, "DIAG_MUX_CTL" }, + { 9751, "DIAG_TRACE_CTL" }, + { 9752, "DIAG_TRACE_DATA" }, + { 9753, "DIAG_TRACE_STS" }, + { 9754, "IDN_DEMUX_BUF_THRESH" }, + { 9755, "L1_I_PIN_WAY_0" }, + { 9756, "MEM_ROUTE_ORDER" }, + { 9757, "MEM_STRIPE_CONFIG" }, + { 9758, "PERF_COUNT_PLS" }, + { 9759, "PSEUDO_RANDOM_NUMBER_MODIFY" }, + { 9760, "QUIESCE_CTL" }, + { 9761, "RSHIM_COORD" }, + { 9762, "SBOX_CONFIG" }, + { 9763, "UDN_DEMUX_BUF_THRESH" }, + { 9764, "XDN_CORE_STARVATION_COUNT" }, + { 9765, "XDN_ROUND_ROBIN_ARB_CTL" }, + { 9856, "CYCLE_MODIFY" }, + { 9857, "I_AAR" }, + { 9984, "MPL_WORLD_ACCESS_SET_0" }, + { 9985, "MPL_WORLD_ACCESS_SET_1" }, + { 9986, "MPL_WORLD_ACCESS_SET_2" }, + { 9987, "MPL_WORLD_ACCESS_SET_3" }, + { 9988, "MPL_WORLD_ACCESS" }, + { 9989, "DONE" }, + { 9990, "DSTREAM_PF" }, + { 9991, "FAIL" }, + { 9992, "INTERRUPT_CRITICAL_SECTION" }, + { 9993, "PASS" }, + { 9994, "PSEUDO_RANDOM_NUMBER" }, + { 9995, "TILE_COORD" }, + { 9996, "TILE_RTF_HWM" }, + { 10112, "CMPEXCH_VALUE" }, + { 10113, "CYCLE" }, + { 10114, "EVENT_BEGIN" }, + { 10115, "EVENT_END" }, + { 10116, "PROC_STATUS" }, + { 10117, "SIM_CONTROL" }, + { 10118, "SIM_SOCKET" }, + { 10119, "STATUS_SATURATE" }, + { 10240, "MPL_I_ASID_SET_0" }, + { 10241, "MPL_I_ASID_SET_1" }, + { 10242, "MPL_I_ASID_SET_2" }, + { 10243, "MPL_I_ASID_SET_3" }, + { 10244, "MPL_I_ASID" }, + { 10245, "I_ASID" }, + { 10496, "MPL_D_ASID_SET_0" }, + { 10497, "MPL_D_ASID_SET_1" }, + { 10498, "MPL_D_ASID_SET_2" }, + { 10499, "MPL_D_ASID_SET_3" }, + { 10500, "MPL_D_ASID" }, + { 10501, "D_ASID" }, + { 10752, "MPL_DOUBLE_FAULT_SET_0" }, + { 10753, "MPL_DOUBLE_FAULT_SET_1" }, + { 10754, "MPL_DOUBLE_FAULT_SET_2" }, + { 10755, "MPL_DOUBLE_FAULT_SET_3" }, + { 10756, "MPL_DOUBLE_FAULT" }, + { 10757, "LAST_INTERRUPT_REASON" }, +}; + +const int tilegx_num_sprs = 441; + +const char * +get_tilegx_spr_name (int num) +{ + void *result; + struct tilegx_spr key; + + key.number = num; + result = bsearch((const void *) &key, (const void *) tilegx_sprs, + tilegx_num_sprs, sizeof (struct tilegx_spr), + tilegx_spr_compare); + + if (result == NULL) + { + return (NULL); + } + else + { + struct tilegx_spr *result_ptr = (struct tilegx_spr *) result; + return (result_ptr->name); + } +} + +int +print_insn_tilegx (unsigned char * memaddr) +{ + struct tilegx_decoded_instruction + decoded[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; + unsigned char opbuf[TILEGX_BUNDLE_SIZE_IN_BYTES]; + int i, num_instructions, num_printed; + tilegx_mnemonic padding_mnemonic; + + memcpy((void *)opbuf, (void *)memaddr, TILEGX_BUNDLE_SIZE_IN_BYTES); + + /* Parse the instructions in the bundle. */ + num_instructions = + parse_insn_tilegx (*(unsigned long long *)opbuf, (unsigned long long)memaddr, decoded); + + /* Print the instructions in the bundle. */ + printf("{ "); + num_printed = 0; + + /* Determine which nop opcode is used for padding and should be skipped. */ + padding_mnemonic = TILEGX_OPC_FNOP; + for (i = 0; i < num_instructions; i++) + { + if (!decoded[i].opcode->can_bundle) + { + /* Instructions that cannot be bundled are padded out with nops, + rather than fnops. Displaying them is always clutter. */ + padding_mnemonic = TILEGX_OPC_NOP; + break; + } + } + + for (i = 0; i < num_instructions; i++) + { + const struct tilegx_opcode *opcode = decoded[i].opcode; + const char *name; + int j; + + /* Do not print out fnops, unless everything is an fnop, in + which case we will print out just the last one. */ + if (opcode->mnemonic == padding_mnemonic + && (num_printed > 0 || i + 1 < num_instructions)) + continue; + + if (num_printed > 0) + printf(" ; "); + ++num_printed; + + name = opcode->name; + if (name == NULL) + name = ""; + printf("%s", name); + + for (j = 0; j < opcode->num_operands; j++) + { + unsigned long long num; + const struct tilegx_operand *op; + const char *spr_name; + + if (j > 0) + printf (","); + printf (" "); + + num = decoded[i].operand_values[j]; + + op = decoded[i].operands[j]; + switch (op->type) + { + case TILEGX_OP_TYPE_REGISTER: + printf ("%s", tilegx_register_names[(int)num]); + break; + case TILEGX_OP_TYPE_SPR: + spr_name = get_tilegx_spr_name(num); + if (spr_name != NULL) + printf ("%s", spr_name); + else + printf ("%d", (int)num); + break; + case TILEGX_OP_TYPE_IMMEDIATE: + printf ("%d", (int)num); + break; + case TILEGX_OP_TYPE_ADDRESS: + printf ("0x%016llx", num); + break; + default: + abort (); + } + } + } + printf (" }\n"); + + return TILEGX_BUNDLE_SIZE_IN_BYTES; +} diff --git a/src/sljit/sljitNativeTILEGX_64.c b/src/sljit/sljitNativeTILEGX_64.c new file mode 100644 index 0000000..1d6aa5a --- /dev/null +++ b/src/sljit/sljitNativeTILEGX_64.c @@ -0,0 +1,2574 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* TileGX architecture. */ +/* Contributed by Tilera Corporation. */ +#include "sljitNativeTILEGX-encoder.c" + +#define SIMM_8BIT_MAX (0x7f) +#define SIMM_8BIT_MIN (-0x80) +#define SIMM_16BIT_MAX (0x7fff) +#define SIMM_16BIT_MIN (-0x8000) +#define SIMM_17BIT_MAX (0xffff) +#define SIMM_17BIT_MIN (-0x10000) +#define SIMM_32BIT_MIN (-0x80000000) +#define SIMM_32BIT_MAX (0x7fffffff) +#define SIMM_48BIT_MIN (0x800000000000L) +#define SIMM_48BIT_MAX (0x7fffffff0000L) +#define IMM16(imm) ((imm) & 0xffff) + +#define UIMM_16BIT_MAX (0xffff) + +#define TMP_REG1 (SLJIT_NO_REGISTERS + 1) +#define TMP_REG2 (SLJIT_NO_REGISTERS + 2) +#define TMP_REG3 (SLJIT_NO_REGISTERS + 3) +#define ADDR_TMP (SLJIT_NO_REGISTERS + 4) +#define PIC_ADDR_REG TMP_REG2 + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { + 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7 +}; + +#define SLJIT_LOCALS_REG_mapped 54 +#define TMP_REG1_mapped 5 +#define TMP_REG2_mapped 16 +#define TMP_REG3_mapped 6 +#define ADDR_TMP_mapped 7 +#define SLJIT_SAVED_REG1_mapped 30 +#define SLJIT_SAVED_REG2_mapped 31 +#define SLJIT_SAVED_REG3_mapped 32 +#define SLJIT_SAVED_EREG1_mapped 33 +#define SLJIT_SAVED_EREG2_mapped 34 + +/* Flags are keept in volatile registers. */ +#define EQUAL_FLAG 8 +/* And carry flag as well. */ +#define ULESS_FLAG 9 +#define UGREATER_FLAG 10 +#define LESS_FLAG 11 +#define GREATER_FLAG 12 +#define OVERFLOW_FLAG 13 + +#define ZERO 63 +#define RA 55 +#define TMP_EREG1 14 +#define TMP_EREG2 15 + +#define LOAD_DATA 0x01 +#define WORD_DATA 0x00 +#define BYTE_DATA 0x02 +#define HALF_DATA 0x04 +#define INT_DATA 0x06 +#define SIGNED_DATA 0x08 +#define DOUBLE_DATA 0x10 + +/* Separates integer and floating point registers */ +#define GPR_REG 0xf + +#define MEM_MASK 0x1f + +#define WRITE_BACK 0x00020 +#define ARG_TEST 0x00040 +#define ALT_KEEP_CACHE 0x00080 +#define CUMULATIVE_OP 0x00100 +#define LOGICAL_OP 0x00200 +#define IMM_OP 0x00400 +#define SRC2_IMM 0x00800 + +#define UNUSED_DEST 0x01000 +#define REG_DEST 0x02000 +#define REG1_SOURCE 0x04000 +#define REG2_SOURCE 0x08000 +#define SLOW_SRC1 0x10000 +#define SLOW_SRC2 0x20000 +#define SLOW_DEST 0x40000 + +/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. + */ +#define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char *sljit_get_platform_name(void) +{ + return "TileGX" SLJIT_CPUINFO; +} + +/* Length of an instruction word */ +typedef sljit_uw sljit_ins; + +struct jit_instr { + const struct tilegx_opcode* opcode; + tilegx_pipeline pipe; + unsigned long input_registers; + unsigned long output_registers; + int operand_value[4]; + int line; +}; + +/* Opcode Helper Macros */ +#define TILEGX_X_MODE 0 + +#define X_MODE create_Mode(TILEGX_X_MODE) + +#define FNOP_X0 \ + create_Opcode_X0(RRR_0_OPCODE_X0) | \ + create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ + create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) + +#define FNOP_X1 \ + create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ + create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1) + +#define NOP \ + create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1 + +#define ANOP_X0 \ + create_Opcode_X0(RRR_0_OPCODE_X0) | \ + create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ + create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0) + +#define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ + create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \ + create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0 + +#define ADD_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0 + +#define ADDI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ + create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0 + +#define SUB_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0 + +#define NOR_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0 + +#define OR_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0 + +#define AND_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0 + +#define XOR_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0 + +#define CMOVNEZ_X0 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ + create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1 + +#define CMOVEQZ_X0 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ + create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1 + +#define ADDLI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0 + +#define V4INT_L_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0 + +#define BFEXTU_X0 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ + create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1 + +#define BFEXTS_X0 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \ + create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1 + +#define SHL16INSLI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0 + +#define ST_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0 + +#define LD_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ + create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0 + +#define JR_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ + create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0 + +#define JALR_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \ + create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0 + +#define CLZ_X0 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \ + create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \ + create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1 + +#define CMPLTUI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ + create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0 + +#define CMPLTU_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0 + +#define CMPLTS_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0 + +#define XORI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ + create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0 + +#define ORI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ + create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0 + +#define ANDI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \ + create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0 + +#define SHLI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ + create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0 + +#define SHL_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0 + +#define SHRSI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ + create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0 + +#define SHRS_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0 + +#define SHRUI_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \ + create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0 + +#define SHRU_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \ + create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0 + +#define BEQZ_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ + create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0 + +#define BNEZ_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \ + create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0 + +#define J_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ + create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0 + +#define JAL_X1 \ + create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \ + create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0 + +#define DEST_X0(x) create_Dest_X0(x) +#define SRCA_X0(x) create_SrcA_X0(x) +#define SRCB_X0(x) create_SrcB_X0(x) +#define DEST_X1(x) create_Dest_X1(x) +#define SRCA_X1(x) create_SrcA_X1(x) +#define SRCB_X1(x) create_SrcB_X1(x) +#define IMM16_X1(x) create_Imm16_X1(x) +#define IMM8_X1(x) create_Imm8_X1(x) +#define BFSTART_X0(x) create_BFStart_X0(x) +#define BFEND_X0(x) create_BFEnd_X0(x) +#define SHIFTIMM_X1(x) create_ShAmt_X1(x) +#define JOFF_X1(x) create_JumpOff_X1(x) +#define BOFF_X1(x) create_BrOff_X1(x) + +static SLJIT_CONST tilegx_mnemonic data_transfer_insts[16] = { + /* u w s */ TILEGX_OPC_ST /* st */, + /* u w l */ TILEGX_OPC_LD /* ld */, + /* u b s */ TILEGX_OPC_ST1 /* st1 */, + /* u b l */ TILEGX_OPC_LD1U /* ld1u */, + /* u h s */ TILEGX_OPC_ST2 /* st2 */, + /* u h l */ TILEGX_OPC_LD2U /* ld2u */, + /* u i s */ TILEGX_OPC_ST4 /* st4 */, + /* u i l */ TILEGX_OPC_LD4U /* ld4u */, + /* s w s */ TILEGX_OPC_ST /* st */, + /* s w l */ TILEGX_OPC_LD /* ld */, + /* s b s */ TILEGX_OPC_ST1 /* st1 */, + /* s b l */ TILEGX_OPC_LD1S /* ld1s */, + /* s h s */ TILEGX_OPC_ST2 /* st2 */, + /* s h l */ TILEGX_OPC_LD2S /* ld2s */, + /* s i s */ TILEGX_OPC_ST4 /* st4 */, + /* s i l */ TILEGX_OPC_LD4S /* ld4s */, +}; + +#ifdef TILEGX_JIT_DEBUG +static sljit_si push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line) +{ + sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + printf("|%04d|S0|:\t\t", line); + print_insn_tilegx(ptr); + return SLJIT_SUCCESS; +} + +static sljit_si push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} + +#define push_inst(a, b) push_inst_debug(a, b, __LINE__) +#else +static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins) +{ + sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins)); + FAIL_IF(!ptr); + *ptr = ins; + compiler->size++; + return SLJIT_SUCCESS; +} +#endif + +#define BUNDLE_FORMAT_MASK(p0, p1, p2) \ + ((p0) | ((p1) << 8) | ((p2) << 16)) + +#define BUNDLE_FORMAT(p0, p1, p2) \ + { \ + { \ + (tilegx_pipeline)(p0), \ + (tilegx_pipeline)(p1), \ + (tilegx_pipeline)(p2) \ + }, \ + BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \ + } + +#define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS + +#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1) + +#define PI(encoding) \ + push_inst(compiler, encoding) + +#define PB3(opcode, dst, srca, srcb) \ + push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__) + +#define PB2(opcode, dst, src) \ + push_2_buffer(compiler, opcode, dst, src, __LINE__) + +#define JR(reg) \ + push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__) + +#define ADD(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__) + +#define SUB(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__) + +#define NOR(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__) + +#define OR(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__) + +#define XOR(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__) + +#define AND(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__) + +#define CLZ(dst, src) \ + push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__) + +#define SHLI(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__) + +#define SHRUI(dst, srca, imm) \ + push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__) + +#define XORI(dst, srca, imm) \ + push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__) + +#define ORI(dst, srca, imm) \ + push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__) + +#define CMPLTU(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__) + +#define CMPLTS(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__) + +#define CMPLTUI(dst, srca, imm) \ + push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__) + +#define CMOVNEZ(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__) + +#define CMOVEQZ(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__) + +#define ADDLI(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__) + +#define SHL16INSLI(dst, srca, srcb) \ + push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__) + +#define LD_ADD(dst, addr, adjust) \ + push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__) + +#define ST_ADD(src, addr, adjust) \ + push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__) + +#define LD(dst, addr) \ + push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__) + +#define BFEXTU(dst, src, start, end) \ + push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__) + +#define BFEXTS(dst, src, start, end) \ + push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__) + +#define ADD_SOLO(dest, srca, srcb) \ + push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb)) + +#define ADDI_SOLO(dest, srca, imm) \ + push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm)) + +#define ADDLI_SOLO(dest, srca, imm) \ + push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) + +#define SHL16INSLI_SOLO(dest, srca, imm) \ + push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm)) + +#define JALR_SOLO(reg) \ + push_inst(compiler, JALR_X1 | SRCA_X1(reg)) + +#define JR_SOLO(reg) \ + push_inst(compiler, JR_X1 | SRCA_X1(reg)) + +struct Format { + /* Mapping of bundle issue slot to assigned pipe. */ + tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; + + /* Mask of pipes used by this bundle. */ + unsigned int pipe_mask; +}; + +const struct Format formats[] = +{ + /* In Y format we must always have something in Y2, since it has + * no fnop, so this conveys that Y2 must always be used. */ + BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE), + BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE), + BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE), + BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE), + + /* Y format has three instructions. */ + BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2), + BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1), + BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2), + BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0), + BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1), + BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0), + + /* X format has only two instructions. */ + BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE), + BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE) +}; + + +struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE]; +unsigned long inst_buf_index; + +tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode) +{ + /* FIXME: tile: we could pregenerate this. */ + int pipe; + for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++) + ; + return (tilegx_pipeline)(pipe); +} + +void insert_nop(tilegx_mnemonic opc, int line) +{ + const struct tilegx_opcode* opcode = NULL; + + memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]); + + opcode = &tilegx_opcodes[opc]; + inst_buf[0].opcode = opcode; + inst_buf[0].pipe = get_any_valid_pipe(opcode); + inst_buf[0].input_registers = 0; + inst_buf[0].output_registers = 0; + inst_buf[0].line = line; + ++inst_buf_index; +} + +const struct Format* compute_format() +{ + unsigned int compatible_pipes = BUNDLE_FORMAT_MASK( + inst_buf[0].opcode->pipes, + inst_buf[1].opcode->pipes, + (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE))); + + const struct Format* match = NULL; + const struct Format *b = NULL; + unsigned int i = 0; + for (i; i < sizeof formats / sizeof formats[0]; i++) { + b = &formats[i]; + if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) { + match = b; + break; + } + } + + return match; +} + +sljit_si assign_pipes() +{ + unsigned long output_registers = 0; + unsigned int i = 0; + + if (inst_buf_index == 1) { + tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle + ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP; + insert_nop(opc, __LINE__); + } + + const struct Format* match = compute_format(); + + if (match == NULL) + return -1; + + for (i = 0; i < inst_buf_index; i++) { + + if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0)) + return -1; + + if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0)) + return -1; + + /* Don't include Rzero in the match set, to avoid triggering + needlessly on 'prefetch' instrs. */ + + output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL; + + inst_buf[i].pipe = match->pipe[i]; + } + + /* If only 2 instrs, and in Y-mode, insert a nop. */ + if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) { + insert_nop(TILEGX_OPC_FNOP, __LINE__); + + /* Select the yet unassigned pipe. */ + tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0 + + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2) + - (inst_buf[1].pipe + inst_buf[2].pipe))); + + inst_buf[0].pipe = pipe; + } + + return 0; +} + +tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst) +{ + int i, val; + const struct tilegx_opcode* opcode = inst->opcode; + tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe]; + + const struct tilegx_operand* operand = NULL; + for (i = 0; i < opcode->num_operands; i++) { + operand = &tilegx_operands[opcode->operands[inst->pipe][i]]; + val = inst->operand_value[i]; + + bits |= operand->insert(val); + } + + return bits; +} + +static sljit_si update_buffer(struct sljit_compiler *compiler) +{ + int count; + int i; + int orig_index = inst_buf_index; + struct jit_instr inst0 = inst_buf[0]; + struct jit_instr inst1 = inst_buf[1]; + struct jit_instr inst2 = inst_buf[2]; + tilegx_bundle_bits bits = 0; + + /* If the bundle is valid as is, perform the encoding and return 1. */ + if (assign_pipes() == 0) { + for (i = 0; i < inst_buf_index; i++) { + bits |= get_bundle_bit(inst_buf + i); +#ifdef TILEGX_JIT_DEBUG + printf("|%04d", inst_buf[i].line); +#endif + } +#ifdef TILEGX_JIT_DEBUG + if (inst_buf_index == 3) + printf("|M0|:\t"); + else + printf("|M0|:\t\t"); + print_insn_tilegx(&bits); +#endif + + inst_buf_index = 0; + +#ifdef TILEGX_JIT_DEBUG + return push_inst_nodebug(compiler, bits); +#else + return push_inst(compiler, bits); +#endif + } + + /* If the bundle is invalid, split it in two. First encode the first two + (or possibly 1) instructions, and then the last, separately. Note that + assign_pipes may have re-ordered the instrs (by inserting no-ops in + lower slots) so we need to reset them. */ + + inst_buf_index = orig_index - 1; + inst_buf[0] = inst0; + inst_buf[1] = inst1; + inst_buf[2] = inst2; + if (assign_pipes() == 0) { + for (i = 0; i < inst_buf_index; i++) { + bits |= get_bundle_bit(inst_buf + i); +#ifdef TILEGX_JIT_DEBUG + printf("|%04d", inst_buf[i].line); +#endif + } + +#ifdef TILEGX_JIT_DEBUG + if (inst_buf_index == 3) + printf("|M1|:\t"); + else + printf("|M1|:\t\t"); + print_insn_tilegx(&bits); +#endif + + if ((orig_index - 1) == 2) { + inst_buf[0] = inst2; + inst_buf_index = 1; + } else if ((orig_index - 1) == 1) { + inst_buf[0] = inst1; + inst_buf_index = 1; + } else + SLJIT_ASSERT_STOP(); + +#ifdef TILEGX_JIT_DEBUG + return push_inst_nodebug(compiler, bits); +#else + return push_inst(compiler, bits); +#endif + } else { + /* We had 3 instrs of which the first 2 can't live in the same bundle. + Split those two. Note that we don't try to then combine the second + and third instr into a single bundle. First instruction: */ + inst_buf_index = 1; + inst_buf[0] = inst0; + inst_buf[1] = inst1; + inst_buf[2] = inst2; + if (assign_pipes() == 0) { + for (i = 0; i < inst_buf_index; i++) { + bits |= get_bundle_bit(inst_buf + i); +#ifdef TILEGX_JIT_DEBUG + printf("|%04d", inst_buf[i].line); +#endif + } + +#ifdef TILEGX_JIT_DEBUG + if (inst_buf_index == 3) + printf("|M2|:\t"); + else + printf("|M2|:\t\t"); + print_insn_tilegx(&bits); +#endif + + inst_buf[0] = inst1; + inst_buf[1] = inst2; + inst_buf_index = orig_index - 1; +#ifdef TILEGX_JIT_DEBUG + return push_inst_nodebug(compiler, bits); +#else + return push_inst(compiler, bits); +#endif + } else + SLJIT_ASSERT_STOP(); + } + + SLJIT_ASSERT_STOP(); +} + +static sljit_si flush_buffer(struct sljit_compiler *compiler) +{ + while (inst_buf_index != 0) + update_buffer(compiler); +} + +static sljit_si push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line) +{ + if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) + FAIL_IF(update_buffer(compiler)); + + const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; + inst_buf[inst_buf_index].opcode = opcode; + inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); + inst_buf[inst_buf_index].operand_value[0] = op0; + inst_buf[inst_buf_index].operand_value[1] = op1; + inst_buf[inst_buf_index].operand_value[2] = op2; + inst_buf[inst_buf_index].operand_value[3] = op3; + inst_buf[inst_buf_index].input_registers = 1L << op1; + inst_buf[inst_buf_index].output_registers = 1L << op0; + inst_buf[inst_buf_index].line = line; + inst_buf_index++; + + return SLJIT_SUCCESS; +} + +static sljit_si push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line) +{ + if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) + FAIL_IF(update_buffer(compiler)); + + const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; + inst_buf[inst_buf_index].opcode = opcode; + inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); + inst_buf[inst_buf_index].operand_value[0] = op0; + inst_buf[inst_buf_index].operand_value[1] = op1; + inst_buf[inst_buf_index].operand_value[2] = op2; + inst_buf[inst_buf_index].line = line; + + switch (opc) { + case TILEGX_OPC_ST_ADD: + inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); + inst_buf[inst_buf_index].output_registers = 1L << op0; + break; + case TILEGX_OPC_LD_ADD: + inst_buf[inst_buf_index].input_registers = 1L << op1; + inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1); + break; + case TILEGX_OPC_ADD: + case TILEGX_OPC_AND: + case TILEGX_OPC_SUB: + case TILEGX_OPC_OR: + case TILEGX_OPC_XOR: + case TILEGX_OPC_NOR: + case TILEGX_OPC_SHL: + case TILEGX_OPC_SHRU: + case TILEGX_OPC_SHRS: + case TILEGX_OPC_CMPLTU: + case TILEGX_OPC_CMPLTS: + case TILEGX_OPC_CMOVEQZ: + case TILEGX_OPC_CMOVNEZ: + inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2); + inst_buf[inst_buf_index].output_registers = 1L << op0; + break; + case TILEGX_OPC_ADDLI: + case TILEGX_OPC_XORI: + case TILEGX_OPC_ORI: + case TILEGX_OPC_SHLI: + case TILEGX_OPC_SHRUI: + case TILEGX_OPC_SHRSI: + case TILEGX_OPC_SHL16INSLI: + case TILEGX_OPC_CMPLTUI: + case TILEGX_OPC_CMPLTSI: + inst_buf[inst_buf_index].input_registers = 1L << op1; + inst_buf[inst_buf_index].output_registers = 1L << op0; + break; + default: + printf("unrecoginzed opc: %s\n", opcode->name); + SLJIT_ASSERT_STOP(); + } + + inst_buf_index++; + + return SLJIT_SUCCESS; +} + +static sljit_si push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line) +{ + if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) + FAIL_IF(update_buffer(compiler)); + + const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; + inst_buf[inst_buf_index].opcode = opcode; + inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); + inst_buf[inst_buf_index].operand_value[0] = op0; + inst_buf[inst_buf_index].operand_value[1] = op1; + inst_buf[inst_buf_index].line = line; + + switch (opc) { + case TILEGX_OPC_BEQZ: + case TILEGX_OPC_BNEZ: + inst_buf[inst_buf_index].input_registers = 1L << op0; + break; + case TILEGX_OPC_ST: + case TILEGX_OPC_ST1: + case TILEGX_OPC_ST2: + case TILEGX_OPC_ST4: + inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1); + inst_buf[inst_buf_index].output_registers = 0; + break; + case TILEGX_OPC_CLZ: + case TILEGX_OPC_LD: + case TILEGX_OPC_LD1U: + case TILEGX_OPC_LD1S: + case TILEGX_OPC_LD2U: + case TILEGX_OPC_LD2S: + case TILEGX_OPC_LD4U: + case TILEGX_OPC_LD4S: + inst_buf[inst_buf_index].input_registers = 1L << op1; + inst_buf[inst_buf_index].output_registers = 1L << op0; + break; + default: + printf("unrecoginzed opc: %s\n", opcode->name); + SLJIT_ASSERT_STOP(); + } + + inst_buf_index++; + + return SLJIT_SUCCESS; +} + +static sljit_si push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line) +{ + if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) + FAIL_IF(update_buffer(compiler)); + + const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; + inst_buf[inst_buf_index].opcode = opcode; + inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); + inst_buf[inst_buf_index].input_registers = 0; + inst_buf[inst_buf_index].output_registers = 0; + inst_buf[inst_buf_index].line = line; + inst_buf_index++; + + return SLJIT_SUCCESS; +} + +static sljit_si push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line) +{ + if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE) + FAIL_IF(update_buffer(compiler)); + + const struct tilegx_opcode* opcode = &tilegx_opcodes[opc]; + inst_buf[inst_buf_index].opcode = opcode; + inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode); + inst_buf[inst_buf_index].operand_value[0] = op0; + inst_buf[inst_buf_index].input_registers = 1L << op0; + inst_buf[inst_buf_index].output_registers = 0; + inst_buf[inst_buf_index].line = line; + inst_buf_index++; + + return flush_buffer(compiler); +} + +static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +{ + sljit_sw diff; + sljit_uw target_addr; + sljit_ins *inst; + sljit_ins saved_inst; + + if (jump->flags & SLJIT_REWRITABLE_JUMP) + return code_ptr; + + if (jump->flags & JUMP_ADDR) + target_addr = jump->u.target; + else { + SLJIT_ASSERT(jump->flags & JUMP_LABEL); + target_addr = (sljit_uw)(code + jump->u.label->size); + } + + inst = (sljit_ins *)jump->addr; + if (jump->flags & IS_COND) + inst--; + + diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3; + if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) { + jump->flags |= PATCH_B; + + if (!(jump->flags & IS_COND)) { + if (jump->flags & IS_JAL) { + jump->flags &= ~(PATCH_B); + jump->flags |= PATCH_J; + inst[0] = JAL_X1; + +#ifdef TILEGX_JIT_DEBUG + printf("[runtime relocate]%04d:\t", __LINE__); + print_insn_tilegx(inst); +#endif + } else { + inst[0] = BEQZ_X1 | SRCA_X1(ZERO); + +#ifdef TILEGX_JIT_DEBUG + printf("[runtime relocate]%04d:\t", __LINE__); + print_insn_tilegx(inst); +#endif + } + + return inst; + } + + inst[0] = inst[0] ^ (0x7L << 55); + +#ifdef TILEGX_JIT_DEBUG + printf("[runtime relocate]%04d:\t", __LINE__); + print_insn_tilegx(inst); +#endif + jump->addr -= sizeof(sljit_ins); + return inst; + } + + if (jump->flags & IS_COND) { + if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { + jump->flags |= PATCH_J; + inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2); + inst[1] = J_X1; + return inst + 1; + } + + return code_ptr; + } + + if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) { + jump->flags |= PATCH_J; + + if (jump->flags & IS_JAL) { + inst[0] = JAL_X1; + +#ifdef TILEGX_JIT_DEBUG + printf("[runtime relocate]%04d:\t", __LINE__); + print_insn_tilegx(inst); +#endif + + } else { + inst[0] = J_X1; + +#ifdef TILEGX_JIT_DEBUG + printf("[runtime relocate]%04d:\t", __LINE__); + print_insn_tilegx(inst); +#endif + } + + return inst; + } + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ins *code; + sljit_ins *code_ptr; + sljit_ins *buf_ptr; + sljit_ins *buf_end; + sljit_uw word_count; + sljit_uw addr; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + check_sljit_generate_code(compiler); + reverse_buf(compiler); + + code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins)); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + word_count = 0; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = (sljit_ins *)buf->memory; + buf_end = buf_ptr + (buf->used_size >> 3); + do { + *code_ptr = *buf_ptr++; + SLJIT_ASSERT(!label || label->size >= word_count); + SLJIT_ASSERT(!jump || jump->addr >= word_count); + SLJIT_ASSERT(!const_ || const_->addr >= word_count); + /* These structures are ordered by their address. */ + if (label && label->size == word_count) { + /* Just recording the address. */ + label->addr = (sljit_uw) code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + if (jump && jump->addr == word_count) { + if (jump->flags & IS_JAL) + jump->addr = (sljit_uw)(code_ptr - 4); + else + jump->addr = (sljit_uw)(code_ptr - 3); + + code_ptr = detect_jump_type(jump, code_ptr, code); + jump = jump->next; + } + + if (const_ && const_->addr == word_count) { + /* Just recording the address. */ + const_->addr = (sljit_uw) code_ptr; + const_ = const_->next; + } + + code_ptr++; + word_count++; + } while (buf_ptr < buf_end); + + buf = buf->next; + } while (buf); + + if (label && label->size == word_count) { + label->addr = (sljit_uw) code_ptr; + label->size = code_ptr - code; + label = label->next; + } + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size); + + jump = compiler->jumps; + while (jump) { + do { + addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; + buf_ptr = (sljit_ins *)jump->addr; + + if (jump->flags & PATCH_B) { + addr = (sljit_sw)(addr - (jump->addr)) >> 3; + SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN); + buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr); + +#ifdef TILEGX_JIT_DEBUG + printf("[runtime relocate]%04d:\t", __LINE__); + print_insn_tilegx(buf_ptr); +#endif + break; + } + + if (jump->flags & PATCH_J) { + SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)); + addr = (sljit_sw)(addr - (jump->addr)) >> 3; + buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr); + +#ifdef TILEGX_JIT_DEBUG + printf("[runtime relocate]%04d:\t", __LINE__); + print_insn_tilegx(buf_ptr); +#endif + break; + } + + SLJIT_ASSERT(!(jump->flags & IS_JAL)); + + /* Set the fields of immediate loads. */ + buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43); + buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43); + buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43); + } while (0); + + jump = jump->next; + } + + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + SLJIT_CACHE_FLUSH(code, code_ptr); + return code; +} + +static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm) +{ + + if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN) + return ADDLI(dst_ar, ZERO, imm); + + if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) { + FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16)); + return SHL16INSLI(dst_ar, dst_ar, imm); + } + + if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) { + FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); + FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); + return SHL16INSLI(dst_ar, dst_ar, imm); + } + + FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48)); + FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32)); + FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); + return SHL16INSLI(dst_ar, dst_ar, imm); +} + +static sljit_si emit_const(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush) +{ + /* Should *not* be optimized as load_immediate, as pcre relocation + mechanism will match this fixed 4-instruction pattern. */ + if (flush) { + FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32)); + FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16)); + return SHL16INSLI_SOLO(dst_ar, dst_ar, imm); + } + + FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32)); + FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16)); + return SHL16INSLI(dst_ar, dst_ar, imm); +} + +static sljit_si emit_const_64(struct sljit_compiler *compiler, sljit_si dst_ar, sljit_sw imm, int flush) +{ + /* Should *not* be optimized as load_immediate, as pcre relocation + mechanism will match this fixed 4-instruction pattern. */ + if (flush) { + FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48)); + FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); + FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); + return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm); + } + + FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48)); + FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32)); + FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16)); + return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_ins base; + sljit_ins bundle = 0; + + CHECK_ERROR(); + check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += (saveds + 1) * sizeof(sljit_sw); + local_size = (local_size + 7) & ~7; + compiler->local_size = local_size; + + if (local_size <= SIMM_16BIT_MAX) { + /* Frequent case. */ + FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size)); + base = SLJIT_LOCALS_REG_mapped; + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); + FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO)); + FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); + base = TMP_REG2_mapped; + local_size = 0; + } + + FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); + FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8)); + + if (saveds >= 1) + FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG1_mapped, -8)); + + if (saveds >= 2) + FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG2_mapped, -8)); + + if (saveds >= 3) + FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_REG3_mapped, -8)); + + if (saveds >= 4) + FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG1_mapped, -8)); + + if (saveds >= 5) + FAIL_IF(ST_ADD(ADDR_TMP_mapped, SLJIT_SAVED_EREG2_mapped, -8)); + + if (args >= 1) + FAIL_IF(ADD(SLJIT_SAVED_REG1_mapped, 0, ZERO)); + + if (args >= 2) + FAIL_IF(ADD(SLJIT_SAVED_REG2_mapped, 1, ZERO)); + + if (args >= 3) + FAIL_IF(ADD(SLJIT_SAVED_REG3_mapped, 2, ZERO)); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + CHECK_ERROR_VOID(); + check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + local_size += (saveds + 1) * sizeof(sljit_sw); + compiler->local_size = (local_size + 7) & ~7; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + sljit_si local_size; + sljit_ins base; + int addr_initialized = 0; + + CHECK_ERROR(); + check_sljit_emit_return(compiler, op, src, srcw); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + local_size = compiler->local_size; + if (local_size <= SIMM_16BIT_MAX) + base = SLJIT_LOCALS_REG_mapped; + else { + FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size)); + FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped)); + base = TMP_REG1_mapped; + local_size = 0; + } + + FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8)); + FAIL_IF(LD(RA, ADDR_TMP_mapped)); + + if (compiler->saveds >= 5) { + FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 48)); + addr_initialized = 1; + + FAIL_IF(LD_ADD(SLJIT_SAVED_EREG2_mapped, ADDR_TMP_mapped, 8)); + } + + if (compiler->saveds >= 4) { + if (addr_initialized == 0) { + FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 40)); + addr_initialized = 1; + } + + FAIL_IF(LD_ADD(SLJIT_SAVED_EREG1_mapped, ADDR_TMP_mapped, 8)); + } + + if (compiler->saveds >= 3) { + if (addr_initialized == 0) { + FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 32)); + addr_initialized = 1; + } + + FAIL_IF(LD_ADD(SLJIT_SAVED_REG3_mapped, ADDR_TMP_mapped, 8)); + } + + if (compiler->saveds >= 2) { + if (addr_initialized == 0) { + FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 24)); + addr_initialized = 1; + } + + FAIL_IF(LD_ADD(SLJIT_SAVED_REG2_mapped, ADDR_TMP_mapped, 8)); + } + + if (compiler->saveds >= 1) { + if (addr_initialized == 0) { + FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 16)); + /* addr_initialized = 1; no need to initialize as it's the last one. */ + } + + FAIL_IF(LD_ADD(SLJIT_SAVED_REG1_mapped, ADDR_TMP_mapped, 8)); + } + + if (compiler->local_size <= SIMM_16BIT_MAX) + FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size)); + else + FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO)); + + return JR(RA); +} + +/* reg_ar is an absoulute register! */ + +/* Can perform an operation using at most 1 instruction. */ +static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) +{ + SLJIT_ASSERT(arg & SLJIT_MEM); + + if ((!(flags & WRITE_BACK) || !(arg & REG_MASK)) + && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { + /* Works for both absoulte and relative addresses. */ + if (SLJIT_UNLIKELY(flags & ARG_TEST)) + return 1; + + FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw)); + + if (flags & LOAD_DATA) + FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); + else + FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); + + return -1; + } + + return 0; +} + +/* See getput_arg below. + Note: can_cache is called only for binary operators. Those + operators always uses word arguments without write back. */ +static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM)); + + /* Simple operation except for updates. */ + if (arg & OFFS_REG_MASK) { + argw &= 0x3; + next_argw &= 0x3; + if (argw && argw == next_argw + && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK))) + return 1; + return 0; + } + + if (arg == next_arg) { + if (((next_argw - argw) <= SIMM_16BIT_MAX + && (next_argw - argw) >= SIMM_16BIT_MIN)) + return 1; + + return 0; + } + + return 0; +} + +/* Emit the necessary instructions. See can_cache above. */ +static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw) +{ + sljit_si tmp_ar, base; + + SLJIT_ASSERT(arg & SLJIT_MEM); + if (!(next_arg & SLJIT_MEM)) { + next_arg = 0; + next_argw = 0; + } + + if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) + tmp_ar = reg_ar; + else + tmp_ar = TMP_REG1_mapped; + + base = arg & REG_MASK; + + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + argw &= 0x3; + + if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) { + SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar); + FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); + reg_ar = TMP_REG1_mapped; + } + + /* Using the cache. */ + if (argw == compiler->cache_argw) { + if (!(flags & WRITE_BACK)) { + if (arg == compiler->cache_arg) { + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); + else + return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); + } + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped)); + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); + else + return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); + } + + FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped)); + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); + else + return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); + } + } else { + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); + else + return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); + } + } + } + + if (SLJIT_UNLIKELY(argw)) { + compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK); + compiler->cache_argw = argw; + FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw)); + } + + if (!(flags & WRITE_BACK)) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); + tmp_ar = TMP_REG3_mapped; + } else + FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); + + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); + else + return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); + } + + FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3])); + + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); + else + return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); + } + + if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { + /* Update only applies if a base register exists. */ + if (reg_ar == reg_map[base]) { + SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar); + if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { + FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw)); + if (flags & LOAD_DATA) + FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped)); + else + FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar)); + + if (argw) + return ADDLI(reg_map[base], reg_map[base], argw); + + return SLJIT_SUCCESS; + } + + FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO)); + reg_ar = TMP_REG1_mapped; + } + + if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) { + if (argw) + FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw)); + } else { + if (compiler->cache_arg == SLJIT_MEM + && argw - compiler->cache_argw <= SIMM_16BIT_MAX + && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); + compiler->cache_argw = argw; + } + + FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); + } else { + compiler->cache_arg = SLJIT_MEM; + compiler->cache_argw = argw; + FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); + FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped)); + } + } + + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]); + else + return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar); + } + + if (compiler->cache_arg == arg + && argw - compiler->cache_argw <= SIMM_16BIT_MAX + && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { + if (argw != compiler->cache_argw) { + FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); + compiler->cache_argw = argw; + } + + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); + else + return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); + } + + if (compiler->cache_arg == SLJIT_MEM + && argw - compiler->cache_argw <= SIMM_16BIT_MAX + && argw - compiler->cache_argw >= SIMM_16BIT_MIN) { + if (argw != compiler->cache_argw) + FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw)); + } else { + compiler->cache_arg = SLJIT_MEM; + FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw)); + } + + compiler->cache_argw = argw; + + if (!base) { + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); + else + return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); + } + + if (arg == next_arg + && next_argw - argw <= SIMM_16BIT_MAX + && next_argw - argw >= SIMM_16BIT_MIN) { + compiler->cache_arg = arg; + FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base])); + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped); + else + return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar); + } + + FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base])); + + if (flags & LOAD_DATA) + return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar); + else + return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar); +} + +static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg_ar, sljit_si arg, sljit_sw argw) +{ + if (getput_arg_fast(compiler, flags, reg_ar, arg, argw)) + return compiler->error; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0); +} + +static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w) +{ + if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) + return compiler->error; + return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + CHECK_ERROR(); + check_sljit_emit_fast_enter(compiler, dst, dstw); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + if (FAST_IS_REG(dst)) + return ADD(reg_map[dst], RA, ZERO); + + /* Memory. */ + return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + check_sljit_emit_fast_return(compiler, src, srcw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) + FAIL_IF(ADD(RA, reg_map[src], ZERO)); + + else if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw)); + + else if (src & SLJIT_IMM) + FAIL_IF(load_immediate(compiler, RA, srcw)); + + return JR(RA); +} + +static SLJIT_INLINE sljit_si emit_single_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_si src1, sljit_sw src2) +{ + sljit_si overflow_ra = 0; + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (dst != src2) + return ADD(reg_map[dst], reg_map[src2], ZERO); + return SLJIT_SUCCESS; + + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SI) + return BFEXTS(reg_map[dst], reg_map[src2], 0, 31); + + return BFEXTU(reg_map[dst], reg_map[src2], 0, 31); + } else if (dst != src2) + SLJIT_ASSERT_STOP(); + + return SLJIT_SUCCESS; + + case SLJIT_MOV_UB: + case SLJIT_MOV_SB: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SB) + return BFEXTS(reg_map[dst], reg_map[src2], 0, 7); + + return BFEXTU(reg_map[dst], reg_map[src2], 0, 7); + } else if (dst != src2) + SLJIT_ASSERT_STOP(); + + return SLJIT_SUCCESS; + + case SLJIT_MOV_UH: + case SLJIT_MOV_SH: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) { + if (op == SLJIT_MOV_SH) + return BFEXTS(reg_map[dst], reg_map[src2], 0, 15); + + return BFEXTU(reg_map[dst], reg_map[src2], 0, 15); + } else if (dst != src2) + SLJIT_ASSERT_STOP(); + + return SLJIT_SUCCESS; + + case SLJIT_NOT: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_E) + FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2])); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2])); + + return SLJIT_SUCCESS; + + case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + if (op & SLJIT_SET_E) + FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2])); + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(CLZ(reg_map[dst], reg_map[src2])); + + return SLJIT_SUCCESS; + + case SLJIT_ADD: + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_O) { + FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63)); + if (src2 < 0) + FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); + } + + if (op & SLJIT_SET_E) + FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2)); + + if (op & SLJIT_SET_C) { + if (src2 >= 0) + FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2)); + else { + FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2)); + FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG)); + } + } + + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); + + if (op & SLJIT_SET_O) { + FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63)); + + if (src2 < 0) + FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1)); + } + } else { + if (op & SLJIT_SET_O) { + FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); + FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); + + if (src1 != dst) + overflow_ra = reg_map[src1]; + else if (src2 != dst) + overflow_ra = reg_map[src2]; + else { + /* Rare ocasion. */ + FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); + overflow_ra = TMP_EREG2; + } + } + + if (op & SLJIT_SET_E) + FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2])); + + if (op & SLJIT_SET_C) + FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2])); + + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2])); + + if (op & SLJIT_SET_O) { + FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra)); + FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); + } + } + + /* a + b >= a | b (otherwise, the carry should be set to 1). */ + if (op & SLJIT_SET_C) + FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG)); + + if (op & SLJIT_SET_O) + return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); + + return SLJIT_SUCCESS; + + case SLJIT_ADDC: + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_C) { + if (src2 >= 0) + FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2)); + else { + FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2)); + FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1)); + } + } + + FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2)); + + } else { + if (op & SLJIT_SET_C) + FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2])); + + /* dst may be the same as src1 or src2. */ + FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2])); + } + + if (op & SLJIT_SET_C) + FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1)); + + FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG)); + + if (!(op & SLJIT_SET_C)) + return SLJIT_SUCCESS; + + /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */ + FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1)); + FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG)); + /* Set carry flag. */ + return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1); + + case SLJIT_SUB: + if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) { + FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_O) { + FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63)); + + if (src2 < 0) + FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1)); + + if (src1 != dst) + overflow_ra = reg_map[src1]; + else { + /* Rare ocasion. */ + FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); + + overflow_ra = TMP_EREG2; + } + } + + if (op & SLJIT_SET_E) + FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2)); + + if (op & SLJIT_SET_C) { + FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); + FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped)); + } + + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E)) + FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); + + } else { + + if (op & SLJIT_SET_O) { + FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2])); + FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63)); + + if (src1 != dst) + overflow_ra = reg_map[src1]; + else { + /* Rare ocasion. */ + FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO)); + overflow_ra = TMP_EREG2; + } + } + + if (op & SLJIT_SET_E) + FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2])); + + if (op & (SLJIT_SET_U | SLJIT_SET_C)) + FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2])); + + if (op & SLJIT_SET_U) + FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1])); + + if (op & SLJIT_SET_S) { + FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2])); + FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1])); + } + + /* dst may be the same as src1 or src2. */ + if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) + FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); + } + + if (op & SLJIT_SET_O) { + FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra)); + FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63)); + return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO); + } + + return SLJIT_SUCCESS; + + case SLJIT_SUBC: + if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) { + FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2)); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_C) { + FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2)); + FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped)); + } + + /* dst may be the same as src1 or src2. */ + FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2)); + + } else { + if (op & SLJIT_SET_C) + FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2])); + /* dst may be the same as src1 or src2. */ + FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2])); + } + + if (op & SLJIT_SET_C) + FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG)); + + FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG)); + + if (op & SLJIT_SET_C) + FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO)); + + return SLJIT_SUCCESS; + +#define EMIT_LOGICAL(op_imm, op_norm) \ + if (flags & SRC2_IMM) { \ + FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_3_buffer( \ + compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ + ADDR_TMP_mapped, __LINE__)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_3_buffer( \ + compiler, op_norm, reg_map[dst], reg_map[src1], \ + ADDR_TMP_mapped, __LINE__)); \ + } else { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_3_buffer( \ + compiler, op_norm, EQUAL_FLAG, reg_map[src1], \ + reg_map[src2], __LINE__)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_3_buffer( \ + compiler, op_norm, reg_map[dst], reg_map[src1], \ + reg_map[src2], __LINE__)); \ + } + + case SLJIT_AND: + EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND); + return SLJIT_SUCCESS; + + case SLJIT_OR: + EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR); + return SLJIT_SUCCESS; + + case SLJIT_XOR: + EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR); + return SLJIT_SUCCESS; + +#define EMIT_SHIFT(op_imm, op_norm) \ + if (flags & SRC2_IMM) { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_3_buffer( \ + compiler, op_imm, EQUAL_FLAG, reg_map[src1], \ + src2 & 0x3F, __LINE__)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_3_buffer( \ + compiler, op_imm, reg_map[dst], reg_map[src1], \ + src2 & 0x3F, __LINE__)); \ + } else { \ + if (op & SLJIT_SET_E) \ + FAIL_IF(push_3_buffer( \ + compiler, op_imm, reg_map[dst], reg_map[src1], \ + src2 & 0x3F, __LINE__)); \ + if (CHECK_FLAGS(SLJIT_SET_E)) \ + FAIL_IF(push_3_buffer( \ + compiler, op_norm, reg_map[dst], reg_map[src1], \ + reg_map[src2], __LINE__)); \ + } + + case SLJIT_SHL: + EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL); + return SLJIT_SUCCESS; + + case SLJIT_LSHR: + EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU); + return SLJIT_SUCCESS; + + case SLJIT_ASHR: + EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS); + return SLJIT_SUCCESS; + } + + SLJIT_ASSERT_STOP(); + return SLJIT_SUCCESS; +} + +static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si flags, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) +{ + /* arg1 goes to TMP_REG1 or src reg. + arg2 goes to TMP_REG2, imm or src reg. + TMP_REG3 can be used for caching. + result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + sljit_si dst_r = TMP_REG2; + sljit_si src1_r; + sljit_sw src2_r = 0; + sljit_si sugg_src2_r = TMP_REG2; + + if (!(flags & ALT_KEEP_CACHE)) { + compiler->cache_arg = 0; + compiler->cache_argw = 0; + } + + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM)) + return SLJIT_SUCCESS; + if (GET_FLAGS(op)) + flags |= UNUSED_DEST; + } else if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + sugg_src2_r = dst_r; + } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw)) + flags |= SLOW_DEST; + + if (flags & IMM_OP) { + if ((src2 & SLJIT_IMM) && src2w) { + if ((!(flags & LOGICAL_OP) + && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN)) + || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) { + flags |= SRC2_IMM; + src2_r = src2w; + } + } + + if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) { + if ((!(flags & LOGICAL_OP) + && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN)) + || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) { + flags |= SRC2_IMM; + src2_r = src1w; + + /* And swap arguments. */ + src1 = src2; + src1w = src2w; + src2 = SLJIT_IMM; + /* src2w = src2_r unneeded. */ + } + } + } + + /* Source 1. */ + if (FAST_IS_REG(src1)) { + src1_r = src1; + flags |= REG1_SOURCE; + } else if (src1 & SLJIT_IMM) { + if (src1w) { + FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w)); + src1_r = TMP_REG1; + } else + src1_r = 0; + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC1; + src1_r = TMP_REG1; + } + + /* Source 2. */ + if (FAST_IS_REG(src2)) { + src2_r = src2; + flags |= REG2_SOURCE; + if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) + dst_r = src2_r; + } else if (src2 & SLJIT_IMM) { + if (!(flags & SRC2_IMM)) { + if (src2w) { + FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w)); + src2_r = sugg_src2_r; + } else { + src2_r = 0; + if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_SI) && (dst & SLJIT_MEM)) + dst_r = 0; + } + } + } else { + if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w)) + FAIL_IF(compiler->error); + else + flags |= SLOW_SRC2; + src2_r = sugg_src2_r; + } + + if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { + SLJIT_ASSERT(src2_r == TMP_REG2); + if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); + } else { + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w)); + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw)); + } + } else if (flags & SLOW_SRC1) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw)); + else if (flags & SLOW_SRC2) + FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw)); + + FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + + if (dst & SLJIT_MEM) { + if (!(flags & SLOW_DEST)) { + getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw); + return compiler->error; + } + + return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw, sljit_si type) +{ + sljit_si sugg_dst_ar, dst_ar; + sljit_si flags = GET_ALL_FLAGS(op); + + CHECK_ERROR(); + check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type); + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + op = GET_OPCODE(op); + sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2]; + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw)); + src = TMP_REG1; + srcw = 0; + } + + switch (type) { + case SLJIT_C_EQUAL: + case SLJIT_C_NOT_EQUAL: + FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1)); + dst_ar = sugg_dst_ar; + break; + case SLJIT_C_LESS: + case SLJIT_C_GREATER_EQUAL: + case SLJIT_C_FLOAT_LESS: + case SLJIT_C_FLOAT_GREATER_EQUAL: + dst_ar = ULESS_FLAG; + break; + case SLJIT_C_GREATER: + case SLJIT_C_LESS_EQUAL: + case SLJIT_C_FLOAT_GREATER: + case SLJIT_C_FLOAT_LESS_EQUAL: + dst_ar = UGREATER_FLAG; + break; + case SLJIT_C_SIG_LESS: + case SLJIT_C_SIG_GREATER_EQUAL: + dst_ar = LESS_FLAG; + break; + case SLJIT_C_SIG_GREATER: + case SLJIT_C_SIG_LESS_EQUAL: + dst_ar = GREATER_FLAG; + break; + case SLJIT_C_OVERFLOW: + case SLJIT_C_NOT_OVERFLOW: + dst_ar = OVERFLOW_FLAG; + break; + case SLJIT_C_MUL_OVERFLOW: + case SLJIT_C_MUL_NOT_OVERFLOW: + FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1)); + dst_ar = sugg_dst_ar; + type ^= 0x1; /* Flip type bit for the XORI below. */ + break; + case SLJIT_C_FLOAT_EQUAL: + case SLJIT_C_FLOAT_NOT_EQUAL: + dst_ar = EQUAL_FLAG; + break; + + default: + SLJIT_ASSERT_STOP(); + dst_ar = sugg_dst_ar; + break; + } + + if (type & 0x1) { + FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1)); + dst_ar = sugg_dst_ar; + } + + if (op >= SLJIT_ADD) { + if (TMP_REG2_mapped != dst_ar) + FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO)); + return emit_op(compiler, op | flags, CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, dst_ar, dst, dstw); + + if (sugg_dst_ar != dst_ar) + return ADD(sugg_dst_ar, dst_ar, ZERO); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + op = GET_OPCODE(op); + switch (op) { + case SLJIT_NOP: + return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__); + + case SLJIT_BREAKPOINT: + return PI(BPT); + + case SLJIT_UMUL: + case SLJIT_SMUL: + case SLJIT_UDIV: + case SLJIT_SDIV: + SLJIT_ASSERT_STOP(); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) +{ + CHECK_ERROR(); + check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV: + case SLJIT_MOV_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UI: + return emit_op(compiler, SLJIT_MOV_UI, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_SI: + return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOV_UB: + return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw); + + case SLJIT_MOV_SB: + return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw); + + case SLJIT_MOV_UH: + return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw); + + case SLJIT_MOV_SH: + return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw); + + case SLJIT_MOVU: + case SLJIT_MOVU_P: + return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UI: + return emit_op(compiler, SLJIT_MOV_UI, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_SI: + return emit_op(compiler, SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_MOVU_UB: + return emit_op(compiler, SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_ub) srcw : srcw); + + case SLJIT_MOVU_SB: + return emit_op(compiler, SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sb) srcw : srcw); + + case SLJIT_MOVU_UH: + return emit_op(compiler, SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_uh) srcw : srcw); + + case SLJIT_MOVU_SH: + return emit_op(compiler, SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_sh) srcw : srcw); + + case SLJIT_NOT: + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + + case SLJIT_NEG: + return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw); + + case SLJIT_CLZ: + return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) +{ + CHECK_ERROR(); + check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + case SLJIT_ADDC: + return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SUB: + case SLJIT_SUBC: + return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_MUL: + return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w); + + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + if (src2 & SLJIT_IMM) + src2w &= 0x3f; + if (op & SLJIT_INT_OP) + src2w &= 0x1f; + + return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler) +{ + struct sljit_label *label; + + flush_buffer(compiler); + + CHECK_ERROR_PTR(); + check_sljit_emit_label(compiler); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + sljit_si src_r = TMP_REG2; + struct sljit_jump *jump = NULL; + + flush_buffer(compiler); + + CHECK_ERROR(); + check_sljit_emit_ijump(compiler, type, src, srcw); + ADJUST_LOCAL_OFFSET(src, srcw); + + if (FAST_IS_REG(src)) { + if (reg_map[src] != 0) + src_r = src; + else + FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO)); + } + + if (type >= SLJIT_CALL0) { + SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); + if (src & (SLJIT_IMM | SLJIT_MEM)) { + if (src & SLJIT_IMM) + FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1)); + else { + SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM)); + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + } + + FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); + + FAIL_IF(ADDI_SOLO(54, 54, -16)); + + FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG])); + + return ADDI_SOLO(54, 54, 16); + } + + /* Register input. */ + if (type >= SLJIT_CALL1) + FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); + + FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO)); + + FAIL_IF(ADDI_SOLO(54, 54, -16)); + + FAIL_IF(JALR_SOLO(reg_map[src_r])); + + return ADDI_SOLO(54, 54, 16); + } + + if (src & SLJIT_IMM) { + jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF(!jump); + set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0)); + jump->u.target = srcw; + FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); + + if (type >= SLJIT_FAST_CALL) { + FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO)); + jump->addr = compiler->size; + FAIL_IF(JR_SOLO(reg_map[src_r])); + } else { + jump->addr = compiler->size; + FAIL_IF(JR_SOLO(reg_map[src_r])); + } + + return SLJIT_SUCCESS; + + } else if (src & SLJIT_MEM) + FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw)); + + FAIL_IF(JR_SOLO(reg_map[src_r])); + + if (jump) + jump->addr = compiler->size; + + return SLJIT_SUCCESS; +} + +#define BR_Z(src) \ + inst = BEQZ_X1 | SRCA_X1(src); \ + flags = IS_COND; + +#define BR_NZ(src) \ + inst = BNEZ_X1 | SRCA_X1(src); \ + flags = IS_COND; + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + struct sljit_jump *jump; + sljit_ins inst; + sljit_si flags = 0; + + flush_buffer(compiler); + + CHECK_ERROR_PTR(); + check_sljit_emit_jump(compiler, type); + + jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + switch (type) { + case SLJIT_C_EQUAL: + case SLJIT_C_FLOAT_NOT_EQUAL: + BR_NZ(EQUAL_FLAG); + break; + case SLJIT_C_NOT_EQUAL: + case SLJIT_C_FLOAT_EQUAL: + BR_Z(EQUAL_FLAG); + break; + case SLJIT_C_LESS: + case SLJIT_C_FLOAT_LESS: + BR_Z(ULESS_FLAG); + break; + case SLJIT_C_GREATER_EQUAL: + case SLJIT_C_FLOAT_GREATER_EQUAL: + BR_NZ(ULESS_FLAG); + break; + case SLJIT_C_GREATER: + case SLJIT_C_FLOAT_GREATER: + BR_Z(UGREATER_FLAG); + break; + case SLJIT_C_LESS_EQUAL: + case SLJIT_C_FLOAT_LESS_EQUAL: + BR_NZ(UGREATER_FLAG); + break; + case SLJIT_C_SIG_LESS: + BR_Z(LESS_FLAG); + break; + case SLJIT_C_SIG_GREATER_EQUAL: + BR_NZ(LESS_FLAG); + break; + case SLJIT_C_SIG_GREATER: + BR_Z(GREATER_FLAG); + break; + case SLJIT_C_SIG_LESS_EQUAL: + BR_NZ(GREATER_FLAG); + break; + case SLJIT_C_OVERFLOW: + case SLJIT_C_MUL_OVERFLOW: + BR_Z(OVERFLOW_FLAG); + break; + case SLJIT_C_NOT_OVERFLOW: + case SLJIT_C_MUL_NOT_OVERFLOW: + BR_NZ(OVERFLOW_FLAG); + break; + default: + /* Not conditional branch. */ + inst = 0; + break; + } + + jump->flags |= flags; + + if (inst) { + inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6)); + PTR_FAIL_IF(PI(inst)); + } + + PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1)); + if (type <= SLJIT_JUMP) { + jump->addr = compiler->size; + PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped)); + } else { + SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2); + /* Cannot be optimized out if type is >= CALL0. */ + jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0); + PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO)); + jump->addr = compiler->size; + PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped)); + } + + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ + return 0; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src, sljit_sw srcw) +{ + SLJIT_ASSERT_STOP(); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, sljit_si dst, sljit_sw dstw, sljit_si src1, sljit_sw src1w, sljit_si src2, sljit_sw src2w) +{ + SLJIT_ASSERT_STOP(); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ + struct sljit_const *const_; + sljit_si reg; + + flush_buffer(compiler); + + CHECK_ERROR_PTR(); + check_sljit_emit_const(compiler, dst, dstw, init_value); + ADJUST_LOCAL_OFFSET(dst, dstw); + + const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + + reg = FAST_IS_REG(dst) ? dst : TMP_REG2; + + PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1)); + + if (dst & SLJIT_MEM) + PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0)); + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ + sljit_ins *inst = (sljit_ins *)addr; + + inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43); + inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43); + inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43); + SLJIT_CACHE_FLUSH(inst, inst + 3); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + sljit_ins *inst = (sljit_ins *)addr; + + inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43); + inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43); + inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43); + inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43); + SLJIT_CACHE_FLUSH(inst, inst + 4); +} diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c new file mode 100644 index 0000000..d7129c8 --- /dev/null +++ b/src/sljit/sljitNativeX86_32.c @@ -0,0 +1,550 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 32-bit arch dependent functions. */ + +static sljit_si emit_do_imm(struct sljit_compiler *compiler, sljit_ub opcode, sljit_sw imm) +{ + sljit_ub *inst; + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw)); + FAIL_IF(!inst); + INC_SIZE(1 + sizeof(sljit_sw)); + *inst++ = opcode; + *(sljit_sw*)inst = imm; + return SLJIT_SUCCESS; +} + +static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type) +{ + if (type == SLJIT_JUMP) { + *code_ptr++ = JMP_i32; + jump->addr++; + } + else if (type >= SLJIT_FAST_CALL) { + *code_ptr++ = CALL_i32; + jump->addr++; + } + else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MW; + else + *(sljit_sw*)code_ptr = jump->u.target - (jump->addr + 4); + code_ptr += 4; + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si size; + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + compiler->args = args; + compiler->flags_saved = 0; + + size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); +#else + size += (args > 0 ? (2 + args * 3) : 0); +#endif + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + PUSH_REG(reg_map[TMP_REG1]); +#if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (args > 0) { + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; + } +#endif + if (saveds > 2 || scratches > 7) + PUSH_REG(reg_map[SLJIT_S2]); + if (saveds > 1 || scratches > 8) + PUSH_REG(reg_map[SLJIT_S1]); + if (saveds > 0 || scratches > 9) + PUSH_REG(reg_map[SLJIT_S0]); + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (args > 0) { + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2]; + } + if (args > 1) { + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1]; + } + if (args > 2) { + *inst++ = MOV_r_rm; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */; + *inst++ = 0x24; + *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */ + } +#else + if (args > 0) { + *inst++ = MOV_r_rm; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1]; + *inst++ = sizeof(sljit_sw) * 2; + } + if (args > 1) { + *inst++ = MOV_r_rm; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1]; + *inst++ = sizeof(sljit_sw) * 3; + } + if (args > 2) { + *inst++ = MOV_r_rm; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1]; + *inst++ = sizeof(sljit_sw) * 4; + } +#endif + + SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words); +#if defined(__APPLE__) + /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */ + saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; +#else + if (options & SLJIT_DOUBLE_ALIGNMENT) { + local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7); + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 17); + FAIL_IF(!inst); + + INC_SIZE(17); + inst[0] = MOV_r_rm; + inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP]; + inst[2] = GROUP_F7; + inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP]; + *(sljit_sw*)(inst + 4) = 0x4; + inst[8] = JNE_i8; + inst[9] = 6; + inst[10] = GROUP_BINARY_81; + inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP]; + *(sljit_sw*)(inst + 12) = 0x4; + inst[16] = PUSH_r + reg_map[TMP_REG1]; + } + else + local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3); +#endif + + compiler->local_size = local_size; +#ifdef _WIN32 + if (local_size > 1024) { +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); +#else + local_size -= SLJIT_LOCALS_OFFSET; + FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); + FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, SLJIT_LOCALS_OFFSET)); +#endif + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); + } +#endif + + SLJIT_ASSERT(local_size > 0); + return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + compiler->args = args; + +#if defined(__APPLE__) + saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; +#else + if (options & SLJIT_DOUBLE_ALIGNMENT) + compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7); + else + compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3); +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + sljit_si size; + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + SLJIT_ASSERT(compiler->args >= 0); + + compiler->flags_saved = 0; + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + + SLJIT_ASSERT(compiler->local_size > 0); + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); + +#if !defined(__APPLE__) + if (compiler->options & SLJIT_DOUBLE_ALIGNMENT) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + + INC_SIZE(3); + inst[0] = MOV_r_rm; + inst[1] = (reg_map[SLJIT_SP] << 3) | 0x4 /* SIB */; + inst[2] = (4 << 3) | reg_map[SLJIT_SP]; + } +#endif + + size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) + + (compiler->saveds <= 3 ? compiler->saveds : 3); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args > 2) + size += 2; +#else + if (compiler->args > 0) + size += 2; +#endif + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + + if (compiler->saveds > 0 || compiler->scratches > 9) + POP_REG(reg_map[SLJIT_S0]); + if (compiler->saveds > 1 || compiler->scratches > 8) + POP_REG(reg_map[SLJIT_S1]); + if (compiler->saveds > 2 || compiler->scratches > 7) + POP_REG(reg_map[SLJIT_S2]); + POP_REG(reg_map[TMP_REG1]); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->args > 2) + RET_I16(sizeof(sljit_sw)); + else + RET(); +#else + RET(); +#endif + + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +/* Size contains the flags as well. */ +static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size, + /* The register or immediate operand. */ + sljit_si a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_si b, sljit_sw immb) +{ + sljit_ub *inst; + sljit_ub *buf_ptr; + sljit_si flags = size & ~0xf; + sljit_si inst_size; + + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + + size &= 0xf; + inst_size = size; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if ((b & REG_MASK) == SLJIT_UNUSED) + inst_size += sizeof(sljit_sw); + else if (immb != 0 && !(b & OFFS_REG_MASK)) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_sb); + else + inst_size += sizeof(sljit_sw); + } + + if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK)) + b |= TO_OFFS_REG(SLJIT_SP); + + if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) + inst_size += 1; /* SIB byte. */ + } + + /* Calculate size of a. */ + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= 0x1f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_sw); + } + else + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + + inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if ((a & SLJIT_IMM) || (a == 0)) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = reg_map[a] << 3; + else + *buf_ptr = a << 3; + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b); + else if ((b & REG_MASK) != SLJIT_UNUSED) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) + *buf_ptr++ |= reg_map[b & REG_MASK]; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3); + } + + if (immb != 0) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = immb; /* 8 bit displacement. */ + else { + *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6); + } + } + else { + *buf_ptr++ |= 0x05; + *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_sw); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = imma; + else if (flags & EX86_HALF_ARG) + *(short*)buf_ptr = imma; + else if (!(flags & EX86_SHIFT_INS)) + *(sljit_sw*)buf_ptr = imma; + } + + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type) +{ + sljit_ub *inst; + +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + inst = (sljit_ub*)ensure_buf(compiler, type >= SLJIT_CALL3 ? 1 + 2 + 1 : 1 + 2); + FAIL_IF(!inst); + INC_SIZE(type >= SLJIT_CALL3 ? 2 + 1 : 2); + + if (type >= SLJIT_CALL3) + PUSH_REG(reg_map[SLJIT_R2]); + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]; +#else + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 * (type - SLJIT_CALL0)); + FAIL_IF(!inst); + INC_SIZE(4 * (type - SLJIT_CALL0)); + + *inst++ = MOV_rm_r; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R0] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; + *inst++ = 0; + if (type >= SLJIT_CALL2) { + *inst++ = MOV_rm_r; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R1] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; + *inst++ = sizeof(sljit_sw); + } + if (type >= SLJIT_CALL3) { + *inst++ = MOV_rm_r; + *inst++ = MOD_DISP8 | (reg_map[SLJIT_R2] << 3) | 0x4 /* SIB */; + *inst++ = (0x4 /* none*/ << 3) | reg_map[SLJIT_SP]; + *inst++ = 2 * sizeof(sljit_sw); + } +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + dst = TMP_REG1; + + if (FAST_IS_REG(dst)) { + /* Unused dest is possible here. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1); + POP_REG(reg_map[dst]); + return SLJIT_SUCCESS; + } + + /* Memory. */ + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = POP_rm; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (FAST_IS_REG(src)) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1 + 1); + PUSH_REG(reg_map[src]); + } + else if (src & SLJIT_MEM) { + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= PUSH_rm; + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + } + else { + /* SLJIT_IMM. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); + FAIL_IF(!inst); + + INC_SIZE(5 + 1); + *inst++ = PUSH_i32; + *(sljit_sw*)inst = srcw; + inst += sizeof(sljit_sw); + } + + RET(); + return SLJIT_SUCCESS; +} diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c new file mode 100644 index 0000000..1790d8a --- /dev/null +++ b/src/sljit/sljitNativeX86_64.c @@ -0,0 +1,747 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* x86 64-bit arch dependent functions. */ + +static sljit_si emit_load_imm64(struct sljit_compiler *compiler, sljit_si reg, sljit_sw imm) +{ + sljit_ub *inst; + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); + FAIL_IF(!inst); + INC_SIZE(2 + sizeof(sljit_sw)); + *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); + *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); + *(sljit_sw*)inst = imm; + return SLJIT_SUCCESS; +} + +static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type) +{ + if (type < SLJIT_JUMP) { + /* Invert type. */ + *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10; + *code_ptr++ = 10 + 3; + } + + SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first); + *code_ptr++ = REX_W | REX_B; + *code_ptr++ = MOV_r_i32 + 1; + jump->addr = (sljit_uw)code_ptr; + + if (jump->flags & JUMP_LABEL) + jump->flags |= PATCH_MD; + else + *(sljit_sw*)code_ptr = jump->u.target; + + code_ptr += sizeof(sljit_sw); + *code_ptr++ = REX_B; + *code_ptr++ = GROUP_FF; + *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); + + return code_ptr; +} + +static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type) +{ + sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_si)); + + if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) { + *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32; + *(sljit_sw*)code_ptr = delta; + } + else { + SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second); + *code_ptr++ = REX_W | REX_B; + *code_ptr++ = MOV_r_i32 + 1; + *(sljit_sw*)code_ptr = addr; + code_ptr += sizeof(sljit_sw); + *code_ptr++ = REX_B; + *code_ptr++ = GROUP_FF; + *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); + } + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si i, tmp, size, saved_register_size; + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + compiler->flags_saved = 0; + + /* Including the return address saved by the call instruction. */ + saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + + tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; + for (i = SLJIT_S0; i >= tmp; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + + for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + PUSH_REG(reg_lmap[i]); + } + + if (args > 0) { + size = args * 3; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + + INC_SIZE(size); + +#ifndef _WIN64 + if (args > 0) { + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */; + } + if (args > 1) { + *inst++ = REX_W | REX_R; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */; + } + if (args > 2) { + *inst++ = REX_W | REX_R; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */; + } +#else + if (args > 0) { + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */; + } + if (args > 1) { + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */; + } + if (args > 2) { + *inst++ = REX_W | REX_B; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */; + } +#endif + } + + local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; + compiler->local_size = local_size; + +#ifdef _WIN64 + if (local_size > 1024) { + /* Allocate stack for the callback, which grows the stack. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_si))); + FAIL_IF(!inst); + INC_SIZE(4 + (3 + sizeof(sljit_si))); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | SUB | 4; + /* Allocated size for registers must be divisible by 8. */ + SLJIT_ASSERT(!(saved_register_size & 0x7)); + /* Aligned to 16 byte. */ + if (saved_register_size & 0x8) { + *inst++ = 5 * sizeof(sljit_sw); + local_size -= 5 * sizeof(sljit_sw); + } else { + *inst++ = 4 * sizeof(sljit_sw); + local_size -= 4 * sizeof(sljit_sw); + } + /* Second instruction */ + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg); + *inst++ = REX_W; + *inst++ = MOV_rm_i32; + *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; + *(sljit_si*)inst = local_size; +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); + } +#endif + + SLJIT_ASSERT(local_size > 0); + if (local_size <= 127) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | SUB | 4; + *inst++ = local_size; + } + else { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!inst); + INC_SIZE(7); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_81; + *inst++ = MOD_REG | SUB | 4; + *(sljit_si*)inst = local_size; + inst += sizeof(sljit_si); + } + +#ifdef _WIN64 + /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */ + if (fscratches >= 6 || fsaveds >= 1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247429; + } +#endif + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_set_context(struct sljit_compiler *compiler, + sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds, + sljit_si fscratches, sljit_si fsaveds, sljit_si local_size) +{ + sljit_si saved_register_size; + + CHECK_ERROR(); + CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); + set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); + + /* Including the return address saved by the call instruction. */ + saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); + compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw) +{ + sljit_si i, tmp, size; + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_return(compiler, op, src, srcw)); + + compiler->flags_saved = 0; + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); + +#ifdef _WIN64 + /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */ + if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = GROUP_0F; + *(sljit_si*)inst = 0x20247428; + } +#endif + + SLJIT_ASSERT(compiler->local_size > 0); + if (compiler->local_size <= 127) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | ADD | 4; + *inst = compiler->local_size; + } + else { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!inst); + INC_SIZE(7); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_81; + *inst++ = MOD_REG | ADD | 4; + *(sljit_si*)inst = compiler->local_size; + } + + tmp = compiler->scratches; + for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; + for (i = tmp; i <= SLJIT_S0; i++) { + size = reg_map[i] >= 8 ? 2 : 1; + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + if (reg_map[i] >= 8) + *inst++ = REX_B; + POP_REG(reg_lmap[i]); + } + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + RET(); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static sljit_si emit_do_imm32(struct sljit_compiler *compiler, sljit_ub rex, sljit_ub opcode, sljit_sw imm) +{ + sljit_ub *inst; + sljit_si length = 1 + (rex ? 1 : 0) + sizeof(sljit_si); + + inst = (sljit_ub*)ensure_buf(compiler, 1 + length); + FAIL_IF(!inst); + INC_SIZE(length); + if (rex) + *inst++ = rex; + *inst++ = opcode; + *(sljit_si*)inst = imm; + return SLJIT_SUCCESS; +} + +static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, sljit_si size, + /* The register or immediate operand. */ + sljit_si a, sljit_sw imma, + /* The general operand (not immediate). */ + sljit_si b, sljit_sw immb) +{ + sljit_ub *inst; + sljit_ub *buf_ptr; + sljit_ub rex = 0; + sljit_si flags = size & ~0xf; + sljit_si inst_size; + + /* The immediate operand must be 32 bit. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); + /* Both cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); + /* Size flags not allowed for typed instructions. */ + SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0); + /* Both size flags cannot be switched on. */ + SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); + /* SSE2 and immediate is not possible. */ + SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); + SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) + && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) + && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + + size &= 0xf; + inst_size = size; + + if (!compiler->mode32 && !(flags & EX86_NO_REXW)) + rex |= REX_W; + else if (flags & EX86_REX) + rex |= REX; + + if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) + inst_size++; + if (flags & EX86_PREF_66) + inst_size++; + + /* Calculate size of b. */ + inst_size += 1; /* mod r/m byte. */ + if (b & SLJIT_MEM) { + if (!(b & OFFS_REG_MASK)) { + if (NOT_HALFWORD(immb)) { + if (emit_load_imm64(compiler, TMP_REG3, immb)) + return NULL; + immb = 0; + if (b & REG_MASK) + b |= TO_OFFS_REG(TMP_REG3); + else + b |= TMP_REG3; + } + else if (reg_lmap[b & REG_MASK] == 4) + b |= TO_OFFS_REG(SLJIT_SP); + } + + if ((b & REG_MASK) == SLJIT_UNUSED) + inst_size += 1 + sizeof(sljit_si); /* SIB byte required to avoid RIP based addressing. */ + else { + if (reg_map[b & REG_MASK] >= 8) + rex |= REX_B; + + if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) { + /* Immediate operand. */ + if (immb <= 127 && immb >= -128) + inst_size += sizeof(sljit_sb); + else + inst_size += sizeof(sljit_si); + } + else if (reg_lmap[b & REG_MASK] == 5) + inst_size += sizeof(sljit_sb); + + if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) { + inst_size += 1; /* SIB byte. */ + if (reg_map[OFFS_REG(b)] >= 8) + rex |= REX_X; + } + } + } + else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8) + rex |= REX_B; + + if (a & SLJIT_IMM) { + if (flags & EX86_BIN_INS) { + if (imma <= 127 && imma >= -128) { + inst_size += 1; + flags |= EX86_BYTE_ARG; + } else + inst_size += 4; + } + else if (flags & EX86_SHIFT_INS) { + imma &= compiler->mode32 ? 0x1f : 0x3f; + if (imma != 1) { + inst_size ++; + flags |= EX86_BYTE_ARG; + } + } else if (flags & EX86_BYTE_ARG) + inst_size++; + else if (flags & EX86_HALF_ARG) + inst_size += sizeof(short); + else + inst_size += sizeof(sljit_si); + } + else { + SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); + /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ + if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8) + rex |= REX_R; + } + + if (rex) + inst_size++; + + inst = (sljit_ub*)ensure_buf(compiler, 1 + inst_size); + PTR_FAIL_IF(!inst); + + /* Encoding the byte. */ + INC_SIZE(inst_size); + if (flags & EX86_PREF_F2) + *inst++ = 0xf2; + if (flags & EX86_PREF_F3) + *inst++ = 0xf3; + if (flags & EX86_PREF_66) + *inst++ = 0x66; + if (rex) + *inst++ = rex; + buf_ptr = inst + size; + + /* Encode mod/rm byte. */ + if (!(flags & EX86_SHIFT_INS)) { + if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; + + if ((a & SLJIT_IMM) || (a == 0)) + *buf_ptr = 0; + else if (!(flags & EX86_SSE2_OP1)) + *buf_ptr = reg_lmap[a] << 3; + else + *buf_ptr = a << 3; + } + else { + if (a & SLJIT_IMM) { + if (imma == 1) + *inst = GROUP_SHIFT_1; + else + *inst = GROUP_SHIFT_N; + } else + *inst = GROUP_SHIFT_CL; + *buf_ptr = 0; + } + + if (!(b & SLJIT_MEM)) + *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b); + else if ((b & REG_MASK) != SLJIT_UNUSED) { + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) { + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr |= 0x40; + else + *buf_ptr |= 0x80; + } + + if ((b & OFFS_REG_MASK) == SLJIT_UNUSED) + *buf_ptr++ |= reg_lmap[b & REG_MASK]; + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3); + } + + if (immb != 0 || reg_lmap[b & REG_MASK] == 5) { + if (immb <= 127 && immb >= -128) + *buf_ptr++ = immb; /* 8 bit displacement. */ + else { + *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_si); + } + } + } + else { + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr |= 0x40; + *buf_ptr++ |= 0x04; + *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6); + if (reg_lmap[b & REG_MASK] == 5) + *buf_ptr++ = 0; + } + } + else { + *buf_ptr++ |= 0x04; + *buf_ptr++ = 0x25; + *(sljit_si*)buf_ptr = immb; /* 32 bit displacement. */ + buf_ptr += sizeof(sljit_si); + } + + if (a & SLJIT_IMM) { + if (flags & EX86_BYTE_ARG) + *buf_ptr = imma; + else if (flags & EX86_HALF_ARG) + *(short*)buf_ptr = imma; + else if (!(flags & EX86_SHIFT_INS)) + *(sljit_si*)buf_ptr = imma; + } + + return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); +} + +/* --------------------------------------------------------------------- */ +/* Call / return instructions */ +/* --------------------------------------------------------------------- */ + +static SLJIT_INLINE sljit_si call_with_args(struct sljit_compiler *compiler, sljit_si type) +{ + sljit_ub *inst; + +#ifndef _WIN64 + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); + + inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); + FAIL_IF(!inst); + INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); + if (type >= SLJIT_CALL3) { + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2]; + } + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0]; +#else + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); + + inst = (sljit_ub*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); + FAIL_IF(!inst); + INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); + if (type >= SLJIT_CALL3) { + *inst++ = REX_W | REX_R; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2]; + } + *inst++ = REX_W; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0]; +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw) +{ + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + /* For UNUSED dst. Uncommon, but possible. */ + if (dst == SLJIT_UNUSED) + dst = TMP_REG1; + + if (FAST_IS_REG(dst)) { + if (reg_map[dst] < 8) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + POP_REG(reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = REX_B; + POP_REG(reg_lmap[dst]); + return SLJIT_SUCCESS; + } + + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = POP_rm; + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw) +{ + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + if (FAST_IS_REG(src)) { + if (reg_map[src] < 8) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 1); + FAIL_IF(!inst); + + INC_SIZE(1 + 1); + PUSH_REG(reg_lmap[src]); + } + else { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 1); + FAIL_IF(!inst); + + INC_SIZE(2 + 1); + *inst++ = REX_B; + PUSH_REG(reg_lmap[src]); + } + } + else if (src & SLJIT_MEM) { + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= PUSH_rm; + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + } + else { + SLJIT_ASSERT(IS_HALFWORD(srcw)); + /* SLJIT_IMM. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5 + 1); + FAIL_IF(!inst); + + INC_SIZE(5 + 1); + *inst++ = PUSH_i32; + *(sljit_si*)inst = srcw; + inst += sizeof(sljit_si); + } + + RET(); + return SLJIT_SUCCESS; +} + + +/* --------------------------------------------------------------------- */ +/* Extend input */ +/* --------------------------------------------------------------------- */ + +static sljit_si emit_mov_int(struct sljit_compiler *compiler, sljit_si sign, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + sljit_si dst_r; + + compiler->mode32 = 0; + + if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) + return SLJIT_SUCCESS; /* Empty instruction. */ + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { + if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + return emit_load_imm64(compiler, dst, srcw); + } + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_si)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + compiler->mode32 = 0; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) + dst_r = src; + else { + if (sign) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = MOVSXD_r_rm; + } else { + compiler->mode32 = 1; + FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); + compiler->mode32 = 0; + } + } + + if (dst & SLJIT_MEM) { + compiler->mode32 = 1; + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + compiler->mode32 = 0; + } + + return SLJIT_SUCCESS; +} diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c new file mode 100644 index 0000000..22a163f --- /dev/null +++ b/src/sljit/sljitNativeX86_common.c @@ -0,0 +1,2925 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void) +{ + return "x86" SLJIT_CPUINFO; +} + +/* + 32b register indexes: + 0 - EAX + 1 - ECX + 2 - EDX + 3 - EBX + 4 - none + 5 - EBP + 6 - ESI + 7 - EDI +*/ + +/* + 64b register indexes: + 0 - RAX + 1 - RCX + 2 - RDX + 3 - RBX + 4 - none + 5 - RBP + 6 - RSI + 7 - RDI + 8 - R8 - From now on REX prefix is required + 9 - R9 + 10 - R10 + 11 - R11 + 12 - R12 + 13 - R13 + 14 - R14 + 15 - R15 +*/ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) + +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { + 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 +}; + +#define CHECK_EXTRA_REGS(p, w, do) \ + if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ + w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ + p = SLJIT_MEM1(SLJIT_SP); \ + do; \ + } + +#else /* SLJIT_CONFIG_X86_32 */ + +/* Last register + 1. */ +#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) +#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) +#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) + +/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present + Note: avoid to use r12 and r13 for memory addessing + therefore r12 is better for SAVED_EREG than SAVED_REG. */ +#ifndef _WIN64 +/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 +}; +/* low-map. reg_map & 0x7. */ +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 +}; +#else +/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ +static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 +}; +/* low-map. reg_map & 0x7. */ +static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 +}; +#endif + +#define REX_W 0x48 +#define REX_R 0x44 +#define REX_X 0x42 +#define REX_B 0x41 +#define REX 0x40 + +#ifndef _WIN64 +#define HALFWORD_MAX 0x7fffffffl +#define HALFWORD_MIN -0x80000000l +#else +#define HALFWORD_MAX 0x7fffffffll +#define HALFWORD_MIN -0x80000000ll +#endif + +#define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN) +#define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN) + +#define CHECK_EXTRA_REGS(p, w, do) + +#endif /* SLJIT_CONFIG_X86_32 */ + +#define TMP_FREG (0) + +/* Size flags for emit_x86_instruction: */ +#define EX86_BIN_INS 0x0010 +#define EX86_SHIFT_INS 0x0020 +#define EX86_REX 0x0040 +#define EX86_NO_REXW 0x0080 +#define EX86_BYTE_ARG 0x0100 +#define EX86_HALF_ARG 0x0200 +#define EX86_PREF_66 0x0400 +#define EX86_PREF_F2 0x0800 +#define EX86_PREF_F3 0x1000 +#define EX86_SSE2_OP1 0x2000 +#define EX86_SSE2_OP2 0x4000 +#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2) + +/* --------------------------------------------------------------------- */ +/* Instrucion forms */ +/* --------------------------------------------------------------------- */ + +#define ADD (/* BINARY */ 0 << 3) +#define ADD_EAX_i32 0x05 +#define ADD_r_rm 0x03 +#define ADD_rm_r 0x01 +#define ADDSD_x_xm 0x58 +#define ADC (/* BINARY */ 2 << 3) +#define ADC_EAX_i32 0x15 +#define ADC_r_rm 0x13 +#define ADC_rm_r 0x11 +#define AND (/* BINARY */ 4 << 3) +#define AND_EAX_i32 0x25 +#define AND_r_rm 0x23 +#define AND_rm_r 0x21 +#define ANDPD_x_xm 0x54 +#define BSR_r_rm (/* GROUP_0F */ 0xbd) +#define CALL_i32 0xe8 +#define CALL_rm (/* GROUP_FF */ 2 << 3) +#define CDQ 0x99 +#define CMOVNE_r_rm (/* GROUP_0F */ 0x45) +#define CMP (/* BINARY */ 7 << 3) +#define CMP_EAX_i32 0x3d +#define CMP_r_rm 0x3b +#define CMP_rm_r 0x39 +#define CVTPD2PS_x_xm 0x5a +#define CVTSI2SD_x_rm 0x2a +#define CVTTSD2SI_r_xm 0x2c +#define DIV (/* GROUP_F7 */ 6 << 3) +#define DIVSD_x_xm 0x5e +#define INT3 0xcc +#define IDIV (/* GROUP_F7 */ 7 << 3) +#define IMUL (/* GROUP_F7 */ 5 << 3) +#define IMUL_r_rm (/* GROUP_0F */ 0xaf) +#define IMUL_r_rm_i8 0x6b +#define IMUL_r_rm_i32 0x69 +#define JE_i8 0x74 +#define JNE_i8 0x75 +#define JMP_i8 0xeb +#define JMP_i32 0xe9 +#define JMP_rm (/* GROUP_FF */ 4 << 3) +#define LEA_r_m 0x8d +#define MOV_r_rm 0x8b +#define MOV_r_i32 0xb8 +#define MOV_rm_r 0x89 +#define MOV_rm_i32 0xc7 +#define MOV_rm8_i8 0xc6 +#define MOV_rm8_r8 0x88 +#define MOVSD_x_xm 0x10 +#define MOVSD_xm_x 0x11 +#define MOVSXD_r_rm 0x63 +#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe) +#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf) +#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6) +#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7) +#define MUL (/* GROUP_F7 */ 4 << 3) +#define MULSD_x_xm 0x59 +#define NEG_rm (/* GROUP_F7 */ 3 << 3) +#define NOP 0x90 +#define NOT_rm (/* GROUP_F7 */ 2 << 3) +#define OR (/* BINARY */ 1 << 3) +#define OR_r_rm 0x0b +#define OR_EAX_i32 0x0d +#define OR_rm_r 0x09 +#define OR_rm8_r8 0x08 +#define POP_r 0x58 +#define POP_rm 0x8f +#define POPF 0x9d +#define PUSH_i32 0x68 +#define PUSH_r 0x50 +#define PUSH_rm (/* GROUP_FF */ 6 << 3) +#define PUSHF 0x9c +#define RET_near 0xc3 +#define RET_i16 0xc2 +#define SBB (/* BINARY */ 3 << 3) +#define SBB_EAX_i32 0x1d +#define SBB_r_rm 0x1b +#define SBB_rm_r 0x19 +#define SAR (/* SHIFT */ 7 << 3) +#define SHL (/* SHIFT */ 4 << 3) +#define SHR (/* SHIFT */ 5 << 3) +#define SUB (/* BINARY */ 5 << 3) +#define SUB_EAX_i32 0x2d +#define SUB_r_rm 0x2b +#define SUB_rm_r 0x29 +#define SUBSD_x_xm 0x5c +#define TEST_EAX_i32 0xa9 +#define TEST_rm_r 0x85 +#define UCOMISD_x_xm 0x2e +#define UNPCKLPD_x_xm 0x14 +#define XCHG_EAX_r 0x90 +#define XCHG_r_rm 0x87 +#define XOR (/* BINARY */ 6 << 3) +#define XOR_EAX_i32 0x35 +#define XOR_r_rm 0x33 +#define XOR_rm_r 0x31 +#define XORPD_x_xm 0x57 + +#define GROUP_0F 0x0f +#define GROUP_F7 0xf7 +#define GROUP_FF 0xff +#define GROUP_BINARY_81 0x81 +#define GROUP_BINARY_83 0x83 +#define GROUP_SHIFT_1 0xd1 +#define GROUP_SHIFT_N 0xc1 +#define GROUP_SHIFT_CL 0xd3 + +#define MOD_REG 0xc0 +#define MOD_DISP8 0x40 + +#define INC_SIZE(s) (*inst++ = (s), compiler->size += (s)) + +#define PUSH_REG(r) (*inst++ = (PUSH_r + (r))) +#define POP_REG(r) (*inst++ = (POP_r + (r))) +#define RET() (*inst++ = (RET_near)) +#define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0) +/* r32, r/m32 */ +#define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm)) + +/* Multithreading does not affect these static variables, since they store + built-in CPU features. Therefore they can be overwritten by different threads + if they detect the CPU features in the same time. */ +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) +static sljit_si cpu_has_sse2 = -1; +#endif +static sljit_si cpu_has_cmov = -1; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 +#include +#endif + +static void get_cpu_features(void) +{ + sljit_ui features; + +#if defined(_MSC_VER) && _MSC_VER >= 1400 + + int CPUInfo[4]; + __cpuid(CPUInfo, 1); + features = (sljit_ui)CPUInfo[3]; + +#elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) + + /* AT&T syntax. */ + __asm__ ( + "movl $0x1, %%eax\n" +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + /* On x86-32, there is no red zone, so this + should work (no need for a local variable). */ + "push %%ebx\n" +#endif + "cpuid\n" +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + "pop %%ebx\n" +#endif + "movl %%edx, %0\n" + : "=g" (features) + : +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + : "%eax", "%ecx", "%edx" +#else + : "%rax", "%rbx", "%rcx", "%rdx" +#endif + ); + +#else /* _MSC_VER && _MSC_VER >= 1400 */ + + /* Intel syntax. */ + __asm { + mov eax, 1 + cpuid + mov features, edx + } + +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + cpu_has_sse2 = (features >> 26) & 0x1; +#endif + cpu_has_cmov = (features >> 15) & 0x1; +} + +static sljit_ub get_jump_code(sljit_si type) +{ + switch (type) { + case SLJIT_EQUAL: + case SLJIT_D_EQUAL: + return 0x84 /* je */; + + case SLJIT_NOT_EQUAL: + case SLJIT_D_NOT_EQUAL: + return 0x85 /* jne */; + + case SLJIT_LESS: + case SLJIT_D_LESS: + return 0x82 /* jc */; + + case SLJIT_GREATER_EQUAL: + case SLJIT_D_GREATER_EQUAL: + return 0x83 /* jae */; + + case SLJIT_GREATER: + case SLJIT_D_GREATER: + return 0x87 /* jnbe */; + + case SLJIT_LESS_EQUAL: + case SLJIT_D_LESS_EQUAL: + return 0x86 /* jbe */; + + case SLJIT_SIG_LESS: + return 0x8c /* jl */; + + case SLJIT_SIG_GREATER_EQUAL: + return 0x8d /* jnl */; + + case SLJIT_SIG_GREATER: + return 0x8f /* jnle */; + + case SLJIT_SIG_LESS_EQUAL: + return 0x8e /* jle */; + + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: + return 0x80 /* jo */; + + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: + return 0x81 /* jno */; + + case SLJIT_D_UNORDERED: + return 0x8a /* jp */; + + case SLJIT_D_ORDERED: + return 0x8b /* jpo */; + } + return 0; +} + +static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type); +#endif + +static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type) +{ + sljit_si short_jump; + sljit_uw label_addr; + + if (jump->flags & JUMP_LABEL) + label_addr = (sljit_uw)(code + jump->u.label->size); + else + label_addr = jump->u.target; + short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN) + return generate_far_jump_code(jump, code_ptr, type); +#endif + + if (type == SLJIT_JUMP) { + if (short_jump) + *code_ptr++ = JMP_i8; + else + *code_ptr++ = JMP_i32; + jump->addr++; + } + else if (type >= SLJIT_FAST_CALL) { + short_jump = 0; + *code_ptr++ = CALL_i32; + jump->addr++; + } + else if (short_jump) { + *code_ptr++ = get_jump_code(type) - 0x10; + jump->addr++; + } + else { + *code_ptr++ = GROUP_0F; + *code_ptr++ = get_jump_code(type); + jump->addr += 2; + } + + if (short_jump) { + jump->flags |= PATCH_MB; + code_ptr += sizeof(sljit_sb); + } else { + jump->flags |= PATCH_MW; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code_ptr += sizeof(sljit_sw); +#else + code_ptr += sizeof(sljit_si); +#endif + } + + return code_ptr; +} + +SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) +{ + struct sljit_memory_fragment *buf; + sljit_ub *code; + sljit_ub *code_ptr; + sljit_ub *buf_ptr; + sljit_ub *buf_end; + sljit_ub len; + + struct sljit_label *label; + struct sljit_jump *jump; + struct sljit_const *const_; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_generate_code(compiler)); + reverse_buf(compiler); + + /* Second code generation pass. */ + code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size); + PTR_FAIL_WITH_EXEC_IF(code); + buf = compiler->buf; + + code_ptr = code; + label = compiler->labels; + jump = compiler->jumps; + const_ = compiler->consts; + do { + buf_ptr = buf->memory; + buf_end = buf_ptr + buf->used_size; + do { + len = *buf_ptr++; + if (len > 0) { + /* The code is already generated. */ + SLJIT_MEMMOVE(code_ptr, buf_ptr, len); + code_ptr += len; + buf_ptr += len; + } + else { + if (*buf_ptr >= 4) { + jump->addr = (sljit_uw)code_ptr; + if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) + code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); + else + code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); + jump = jump->next; + } + else if (*buf_ptr == 0) { + label->addr = (sljit_uw)code_ptr; + label->size = code_ptr - code; + label = label->next; + } + else if (*buf_ptr == 1) { + const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); + const_ = const_->next; + } + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; + buf_ptr++; + *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); + code_ptr += sizeof(sljit_sw); + buf_ptr += sizeof(sljit_sw) - 1; +#else + code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); + buf_ptr += sizeof(sljit_sw); +#endif + } + buf_ptr++; + } + } while (buf_ptr < buf_end); + SLJIT_ASSERT(buf_ptr == buf_end); + buf = buf->next; + } while (buf); + + SLJIT_ASSERT(!label); + SLJIT_ASSERT(!jump); + SLJIT_ASSERT(!const_); + + jump = compiler->jumps; + while (jump) { + if (jump->flags & PATCH_MB) { + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127); + *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))); + } else if (jump->flags & PATCH_MW) { + if (jump->flags & JUMP_LABEL) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); +#else + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); + *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))); +#endif + } + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); +#else + SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX); + *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si))); +#endif + } + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + else if (jump->flags & PATCH_MD) + *(sljit_sw*)jump->addr = jump->u.label->addr; +#endif + + jump = jump->next; + } + + /* Maybe we waste some space because of short jumps. */ + SLJIT_ASSERT(code_ptr <= code + compiler->size); + compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_size = code_ptr - code; + return (void*)code; +} + +/* --------------------------------------------------------------------- */ +/* Operators */ +/* --------------------------------------------------------------------- */ + +static sljit_si emit_cum_binary(struct sljit_compiler *compiler, + sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w); + +static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w); + +static sljit_si emit_mov(struct sljit_compiler *compiler, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw); + +static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler) +{ + sljit_ub *inst; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); +#else + inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!inst); + INC_SIZE(6); + *inst++ = REX_W; +#endif + *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ + *inst++ = 0x64; + *inst++ = 0x24; + *inst++ = (sljit_ub)sizeof(sljit_sw); + *inst++ = PUSHF; + compiler->flags_saved = 1; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags) +{ + sljit_ub *inst; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + *inst++ = POPF; +#else + inst = (sljit_ub*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!inst); + INC_SIZE(6); + *inst++ = POPF; + *inst++ = REX_W; +#endif + *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ + *inst++ = 0x64; + *inst++ = 0x24; + *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw); + compiler->flags_saved = keep_flags; + return SLJIT_SUCCESS; +} + +#ifdef _WIN32 +#include + +static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size) +{ + /* Workaround for calling the internal _chkstk() function on Windows. + This function touches all 4k pages belongs to the requested stack space, + which size is passed in local_size. This is necessary on Windows where + the stack can only grow in 4k steps. However, this function just burn + CPU cycles if the stack is large enough. However, you don't know it in + advance, so it must always be called. I think this is a bad design in + general even if it has some reasons. */ + *(volatile sljit_si*)alloca(local_size) = 0; +} + +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#include "sljitNativeX86_32.c" +#else +#include "sljitNativeX86_64.c" +#endif + +static sljit_si emit_mov(struct sljit_compiler *compiler, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + + if (dst == SLJIT_UNUSED) { + /* No destination, doesn't need to setup flags. */ + if (src & SLJIT_MEM) { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + } + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(src)) { + inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; + } + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); +#else + if (!compiler->mode32) { + if (NOT_HALFWORD(srcw)) + return emit_load_imm64(compiler, dst, srcw); + } + else + return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw); +#endif + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (!compiler->mode32 && NOT_HALFWORD(srcw)) { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; + } +#endif + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + return SLJIT_SUCCESS; + } + + /* Memory to memory move. Requires two instruction. */ + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); + FAIL_IF(!inst); + *inst = MOV_r_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + return SLJIT_SUCCESS; +} + +#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) +{ + sljit_ub *inst; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_si size; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op0(compiler, op)); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = INT3; + break; + case SLJIT_NOP: + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = NOP; + break; + case SLJIT_LUMUL: + case SLJIT_LSMUL: + case SLJIT_LUDIV: + case SLJIT_LSDIV: + compiler->flags_saved = 0; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifdef _WIN64 + SLJIT_COMPILE_ASSERT( + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] == 2 + && reg_map[TMP_REG1] > 7, + invalid_register_assignment_for_div_mul); +#else + SLJIT_COMPILE_ASSERT( + reg_map[SLJIT_R0] == 0 + && reg_map[SLJIT_R1] < 7 + && reg_map[TMP_REG1] == 2, + invalid_register_assignment_for_div_mul); +#endif + compiler->mode32 = op & SLJIT_INT_OP; +#endif + + op = GET_OPCODE(op); + if (op == SLJIT_LUDIV) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); +#else + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); +#endif + FAIL_IF(!inst); + *inst = XOR_r_rm; + } + + if (op == SLJIT_LSDIV) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); +#endif + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = CDQ; +#else + if (compiler->mode32) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = CDQ; + } else { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = REX_W; + *inst = CDQ; + } +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); +#else +#ifdef _WIN64 + size = (!compiler->mode32 || op >= SLJIT_LUDIV) ? 3 : 2; +#else + size = (!compiler->mode32) ? 3 : 2; +#endif + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); +#ifdef _WIN64 + if (!compiler->mode32) + *inst++ = REX_W | ((op >= SLJIT_LUDIV) ? REX_B : 0); + else if (op >= SLJIT_LUDIV) + *inst++ = REX_B; + *inst++ = GROUP_F7; + *inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); +#else + if (!compiler->mode32) + *inst++ = REX_W; + *inst++ = GROUP_F7; + *inst = MOD_REG | reg_map[SLJIT_R1]; +#endif +#endif + switch (op) { + case SLJIT_LUMUL: + *inst |= MUL; + break; + case SLJIT_LSMUL: + *inst |= IMUL; + break; + case SLJIT_LUDIV: + *inst |= DIV; + break; + case SLJIT_LSDIV: + *inst |= IDIV; + break; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#endif + break; + } + + return SLJIT_SUCCESS; +} + +#define ENCODE_PREFIX(prefix) \ + do { \ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ + FAIL_IF(!inst); \ + INC_SIZE(1); \ + *inst = (prefix); \ + } while (0) + +static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + sljit_si dst_r; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_si work_r; +#endif + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) + return SLJIT_SUCCESS; /* Empty instruction. */ + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; +#endif + } + inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_i8; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg_map[src] >= 4) { + SLJIT_ASSERT(dst_r == TMP_REG1); + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + } else + dst_r = src; +#else + dst_r = src; +#endif + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else if (FAST_IS_REG(src) && reg_map[src] >= 4) { + /* src, dst are registers. */ + SLJIT_ASSERT(SLOW_IS_REG(dst)); + if (reg_map[dst] < 4) { + if (dst != src) + EMIT_MOV(compiler, dst, 0, src, 0); + inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; + } + else { + if (dst != src) + EMIT_MOV(compiler, dst, 0, src, 0); + if (sign) { + /* shl reg, 24 */ + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SHL; + /* sar reg, 24 */ + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0); + FAIL_IF(!inst); + *inst |= SAR; + } + else { + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0); + FAIL_IF(!inst); + *(inst + 1) |= AND; + } + } + return SLJIT_SUCCESS; + } +#endif + else { + /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */ + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8; + } + + if (dst & SLJIT_MEM) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst_r == TMP_REG1) { + /* Find a non-used register, whose reg_map[src] < 4. */ + if ((dst & REG_MASK) == SLJIT_R0) { + if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1)) + work_r = SLJIT_R2; + else + work_r = SLJIT_R1; + } + else { + if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + work_r = SLJIT_R0; + else if ((dst & REG_MASK) == SLJIT_R1) + work_r = SLJIT_R2; + else + work_r = SLJIT_R1; + } + + if (work_r == SLJIT_R0) { + ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); + } + else { + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + } + + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; + + if (work_r == SLJIT_R0) { + ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]); + } + else { + inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + } + } + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; + } +#else + inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm8_r8; +#endif + } + + return SLJIT_SUCCESS; +} + +static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + sljit_si dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) + return SLJIT_SUCCESS; /* Empty instruction. */ + + if (src & SLJIT_IMM) { + if (FAST_IS_REG(dst)) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw); +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; +#endif + } + inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) + dst_r = src; + else { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16; + } + + if (dst & SLJIT_MEM) { + inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_r; + } + + return SLJIT_SUCCESS; +} + +static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + return SLJIT_SUCCESS; + } + if (dst == src && dstw == srcw) { + /* Same input and output */ + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + return SLJIT_SUCCESS; + } + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= opcode; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static sljit_si emit_not_with_flags(struct sljit_compiler *compiler, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; + return SLJIT_SUCCESS; + } + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = OR_r_rm; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + sljit_si dst_r; + + SLJIT_UNUSED_ARG(op_flags); + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { + /* Just set the zero flag. */ + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_F7; + *inst |= NOT_rm; +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); +#else + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0); +#endif + FAIL_IF(!inst); + *inst |= SHR; + return SLJIT_SUCCESS; + } + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = BSR_r_rm; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (FAST_IS_REG(dst)) + dst_r = dst; + else { + /* Find an unused temporary register. */ + if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) + dst_r = SLJIT_R0; + else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) + dst_r = SLJIT_R1; + else + dst_r = SLJIT_R2; + EMIT_MOV(compiler, dst, dstw, dst_r, 0); + } + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); +#else + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + compiler->mode32 = 0; + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31); + compiler->mode32 = op_flags & SLJIT_INT_OP; +#endif + + if (cpu_has_cmov == -1) + get_cpu_features(); + + if (cpu_has_cmov) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CMOVNE_r_rm; + } else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + + *inst++ = JE_i8; + *inst++ = 2; + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; +#else + inst = (sljit_ub*)ensure_buf(compiler, 1 + 5); + FAIL_IF(!inst); + INC_SIZE(5); + + *inst++ = JE_i8; + *inst++ = 3; + *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); + *inst++ = MOV_r_rm; + *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); +#else + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0); +#endif + FAIL_IF(!inst); + *(inst + 1) |= XOR; + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (dst & SLJIT_MEM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + } +#else + if (dst & SLJIT_MEM) + EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); +#endif + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_ub* inst; + sljit_si update = 0; + sljit_si op_flags = GET_ALL_FLAGS(op); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + sljit_si dst_is_ereg = 0; + sljit_si src_is_ereg = 0; +#else +# define src_is_ereg 0 +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1); + CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op_flags & SLJIT_INT_OP; +#endif + + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + if (op_flags & SLJIT_INT_OP) { + if (FAST_IS_REG(src) && src == dst) { + if (!TYPE_CAST_NEEDED(op)) + return SLJIT_SUCCESS; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (op == SLJIT_MOV_SI && (src & SLJIT_MEM)) + op = SLJIT_MOV_UI; + if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM)) + op = SLJIT_MOVU_UI; + if (op == SLJIT_MOV_UI && (src & SLJIT_IMM)) + op = SLJIT_MOV_SI; + if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM)) + op = SLJIT_MOVU_SI; +#endif + } + + SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset); + if (op >= SLJIT_MOVU) { + update = 1; + op -= 8; + } + + if (src & SLJIT_IMM) { + switch (op) { + case SLJIT_MOV_UB: + srcw = (sljit_ub)srcw; + break; + case SLJIT_MOV_SB: + srcw = (sljit_sb)srcw; + break; + case SLJIT_MOV_UH: + srcw = (sljit_uh)srcw; + break; + case SLJIT_MOV_SH: + srcw = (sljit_sh)srcw; + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_UI: + srcw = (sljit_ui)srcw; + break; + case SLJIT_MOV_SI: + srcw = (sljit_si)srcw; + break; +#endif + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg)) + return emit_mov(compiler, dst, dstw, src, srcw); +#endif + } + + if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) { + inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw); + FAIL_IF(!inst); + *inst = LEA_r_m; + src &= SLJIT_MEM | 0xf; + srcw = 0; + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { + SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); + dst = TMP_REG1; + } +#endif + + switch (op) { + case SLJIT_MOV: + case SLJIT_MOV_P: +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_MOV_UI: + case SLJIT_MOV_SI: +#endif + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_UB: + FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_SB: + FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_UH: + FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_SH: + FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw)); + break; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + case SLJIT_MOV_UI: + FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw)); + break; + case SLJIT_MOV_SI: + FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw)); + break; +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1) + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); +#endif + + if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { + inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw); + FAIL_IF(!inst); + *inst = LEA_r_m; + } + return SLJIT_SUCCESS; + } + + if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) + compiler->flags_saved = 0; + + switch (op) { + case SLJIT_NOT: + if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) + return emit_not_with_flags(compiler, dst, dstw, src, srcw); + return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); + + case SLJIT_NEG: + if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); + + case SLJIT_CLZ: + if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + return emit_clz(compiler, op_flags, dst, dstw, src, srcw); + } + + return SLJIT_SUCCESS; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +# undef src_is_ereg +#endif +} + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + if (IS_HALFWORD(immw) || compiler->mode32) { \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); \ + } \ + else { \ + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ + FAIL_IF(!inst); \ + *inst = (op_mr); \ + } + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw)) + +#else + +#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \ + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \ + FAIL_IF(!inst); \ + *(inst + 1) |= (op_imm); + +#define BINARY_EAX_IMM(op_eax_imm, immw) \ + FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw)) + +#endif + +static sljit_si emit_cum_binary(struct sljit_compiler *compiler, + sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_ub* inst; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + return SLJIT_SUCCESS; + } + + if (dst == src1 && dstw == src1w) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src2)) { + /* Special exception for sljit_emit_op_flags. */ + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* Only for cumulative operations. */ + if (dst == src2 && dstw == src2w) { + if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src1w); + } + else { + BINARY_IMM(op_imm, op_mr, src1w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src1)) { + inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler, + sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_ub* inst; + + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + return SLJIT_SUCCESS; + } + + if (dst == src1 && dstw == src1w) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(op_eax_imm, src2w); + } + else { + BINARY_IMM(op_imm, op_mr, src2w, dst, dstw); + } + } + else if (FAST_IS_REG(dst)) { + inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + else if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); + FAIL_IF(!inst); + *inst = op_mr; + } + return SLJIT_SUCCESS; + } + + /* General version. */ + if (FAST_IS_REG(dst) && dst != src2) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, dst, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + } + else { + /* This version requires less memory writing. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { + BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = op_rm; + } + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_si emit_mul(struct sljit_compiler *compiler, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_ub* inst; + sljit_si dst_r; + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + /* Register destination. */ + if (dst_r == src1 && !(src2 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + else if (dst_r == src2 && !(src1 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + else if (src1 & SLJIT_IMM) { + if (src2 & SLJIT_IMM) { + EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w); + src2 = dst_r; + src2w = 0; + } + + if (src1w <= 127 && src1w >= -128) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = (sljit_sb)src1w; + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_sw*)inst = src1w; + } +#else + else if (IS_HALFWORD(src1w)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_si*)inst = (sljit_si)src1w; + } + else { + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); + if (dst_r != src2) + EMIT_MOV(compiler, dst_r, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } +#endif + } + else if (src2 & SLJIT_IMM) { + /* Note: src1 is NOT immediate. */ + + if (src2w <= 127 && src2w >= -128) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i8; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!inst); + INC_SIZE(1); + *inst = (sljit_sb)src2w; + } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + else { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_sw*)inst = src2w; + } +#else + else if (IS_HALFWORD(src2w)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w); + FAIL_IF(!inst); + *inst = IMUL_r_rm_i32; + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *(sljit_si*)inst = (sljit_si)src2w; + } + else { + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); + if (dst_r != src1) + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } +#endif + } + else { + /* Neither argument is immediate. */ + if (ADDRESSING_DEPENDS_ON(src2, dst_r)) + dst_r = TMP_REG1; + EMIT_MOV(compiler, dst_r, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = IMUL_r_rm; + } + + if (dst_r == TMP_REG1) + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + + return SLJIT_SUCCESS; +} + +static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_ub* inst; + sljit_si dst_r, done = 0; + + /* These cases better be left to handled by normal way. */ + if (!keep_flags) { + if (dst == src1 && dstw == src1w) + return SLJIT_ERR_UNSUPPORTED; + if (dst == src2 && dstw == src2w) + return SLJIT_ERR_UNSUPPORTED; + } + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + if (FAST_IS_REG(src1)) { + if (FAST_IS_REG(src2)) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0); + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w); +#else + if (src2 & SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w); +#endif + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } + } + else if (FAST_IS_REG(src2)) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w); +#else + if (src1 & SLJIT_IMM) { + inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w); +#endif + FAIL_IF(!inst); + *inst = LEA_r_m; + done = 1; + } + } + + if (done) { + if (dst_r == TMP_REG1) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + return SLJIT_ERR_UNSUPPORTED; +} + +static sljit_si emit_cmp_binary(struct sljit_compiler *compiler, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_ub* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(CMP_EAX_i32, src2w); + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src1)) { + if (src2 & SLJIT_IMM) { + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0); + } + else { + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; + } + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = CMP_rm_r; + return SLJIT_SUCCESS; + } + + if (src2 & SLJIT_IMM) { + if (src1 & SLJIT_IMM) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + src1 = TMP_REG1; + src1w = 0; + } + BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w); + } + else { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = CMP_r_rm; + } + return SLJIT_SUCCESS; +} + +static sljit_si emit_test_binary(struct sljit_compiler *compiler, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_ub* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) { +#else + if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) { +#endif + BINARY_EAX_IMM(TEST_EAX_i32, src2w); + return SLJIT_SUCCESS; + } + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { +#else + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { +#endif + BINARY_EAX_IMM(TEST_EAX_i32, src1w); + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src1)) { + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + } + else { + inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } + return SLJIT_SUCCESS; + } + + if (FAST_IS_REG(src2)) { + if (src1 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src1w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + } + else { + inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (IS_HALFWORD(src2w) || compiler->mode32) { + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; + } + else { + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } +#else + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst = GROUP_F7; +#endif + } + else { + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); + FAIL_IF(!inst); + *inst = TEST_rm_r; + } + return SLJIT_SUCCESS; +} + +static sljit_si emit_shift(struct sljit_compiler *compiler, + sljit_ub mode, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_ub* inst; + + if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) { + if (dst == src1 && dstw == src1w) { + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw); + FAIL_IF(!inst); + *inst |= mode; + return SLJIT_SUCCESS; + } + if (dst == SLJIT_UNUSED) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + return SLJIT_SUCCESS; + } + if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + if (FAST_IS_REG(dst)) { + EMIT_MOV(compiler, dst, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0); + FAIL_IF(!inst); + *inst |= mode; + return SLJIT_SUCCESS; + } + + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; + } + + if (dst == SLJIT_PREF_SHIFT_REG) { + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + } + else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) { + if (src1 != dst) + EMIT_MOV(compiler, dst, 0, src1, src1w); + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + } + else { + /* This case is really difficult, since ecx itself may used for + addressing, and we must ensure to work even in that case. */ + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); +#else + /* [esp+0] contains the flags. */ + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); +#endif + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); +#else + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); +#endif + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); + } + + return SLJIT_SUCCESS; +} + +static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler, + sljit_ub mode, sljit_si set_flags, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + /* The CPU does not set flags if the shift count is 0. */ + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); +#else + if ((src2w & 0x1f) != 0) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); +#endif + if (!set_flags) + return emit_mov(compiler, dst, dstw, src1, src1w); + /* OR dst, src, 0 */ + return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, + dst, dstw, src1, src1w, SLJIT_IMM, 0); + } + + if (!set_flags) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); + + if (!FAST_IS_REG(dst)) + FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); + + FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); + + if (FAST_IS_REG(dst)) + return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + CHECK_EXTRA_REGS(src1, src1w, (void)0); + CHECK_EXTRA_REGS(src2, src2w, (void)0); +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = op & SLJIT_INT_OP; +#endif + + if (GET_OPCODE(op) >= SLJIT_MUL) { + if (SLJIT_UNLIKELY(GET_FLAGS(op))) + compiler->flags_saved = 0; + else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_ADD: + if (!GET_FLAGS(op)) { + if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + } + else + compiler->flags_saved = 0; + if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ADDC: + if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ + FAIL_IF(emit_restore_flags(compiler, 1)); + else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) + FAIL_IF(emit_save_flags(compiler)); + if (SLJIT_UNLIKELY(GET_FLAGS(op))) + compiler->flags_saved = 0; + return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUB: + if (!GET_FLAGS(op)) { + if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) + return compiler->error; + } + else + compiler->flags_saved = 0; + if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) + FAIL_IF(emit_save_flags(compiler)); + if (dst == SLJIT_UNUSED) + return emit_cmp_binary(compiler, src1, src1w, src2, src2w); + return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SUBC: + if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ + FAIL_IF(emit_restore_flags(compiler, 1)); + else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) + FAIL_IF(emit_save_flags(compiler)); + if (SLJIT_UNLIKELY(GET_FLAGS(op))) + compiler->flags_saved = 0; + return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_MUL: + return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w); + case SLJIT_AND: + if (dst == SLJIT_UNUSED) + return emit_test_binary(compiler, src1, src1w, src2, src2w); + return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_OR: + return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_XOR: + return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_SHL: + return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_LSHR: + return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + case SLJIT_ASHR: + return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), + dst, dstw, src1, src1w, src2, src2w); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_register_index(reg)); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg >= SLJIT_R3 && reg <= SLJIT_R6) + return -1; +#endif + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg) +{ + CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + return reg; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, sljit_si size) +{ + sljit_ub *inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_custom(compiler, instruction, size)); + + inst = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + SLJIT_MEMMOVE(inst, instruction, size); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Floating point operators */ +/* --------------------------------------------------------------------- */ + +/* Alignment + 2 * 16 bytes. */ +static sljit_si sse2_data[3 + (4 + 4) * 2]; +static sljit_si *sse2_buffer; + +static void init_compiler(void) +{ + sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf); + /* Single precision constants. */ + sse2_buffer[0] = 0x80000000; + sse2_buffer[4] = 0x7fffffff; + /* Double precision constants. */ + sse2_buffer[8] = 0; + sse2_buffer[9] = 0x80000000; + sse2_buffer[12] = 0xffffffff; + sse2_buffer[13] = 0x7fffffff; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void) +{ +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else /* SLJIT_DETECT_SSE2 */ + return 1; +#endif /* SLJIT_DETECT_SSE2 */ +} + +static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode, + sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) +{ + sljit_ub *inst; + + inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; + return SLJIT_SUCCESS; +} + +static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode, + sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w) +{ + sljit_ub *inst; + + inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = opcode; + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler, + sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw) +{ + return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw); +} + +static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler, + sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src) +{ + return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw); +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_ub *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVW_FROMD) + compiler->mode32 = 0; +#endif + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTTSD2SI_r_xm; + + if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_ub *inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMW) + compiler->mode32 = 0; +#endif + + if (src & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) + srcw = (sljit_si)srcw; +#endif + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CVTSI2SD_x_rm; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + compiler->flags_saved = 0; + if (!FAST_IS_REG(src1)) { + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); + src1 = TMP_FREG; + } + return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw) +{ + sljit_si dst_r; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + CHECK_ERROR(); + SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); + + if (GET_OPCODE(op) == SLJIT_DMOV) { + if (FAST_IS_REG(dst)) + return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw); + if (FAST_IS_REG(src)) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src); + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw)); + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + } + + if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) { + dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + if (FAST_IS_REG(src)) { + /* We overwrite the high bits of source. From SLJIT point of view, + this is not an issue. + Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */ + FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0)); + } + else { + FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw)); + src = TMP_FREG; + } + + FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0)); + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (SLOW_IS_REG(dst)) { + dst_r = dst; + if (dst != src) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); + } + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_DNEG: + FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8))); + break; + + case SLJIT_DABS: + FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12))); + break; + } + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src1, sljit_sw src1w, + sljit_si src2, sljit_sw src2w) +{ + sljit_si dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); + ADJUST_LOCAL_OFFSET(dst, dstw); + ADJUST_LOCAL_OFFSET(src1, src1w); + ADJUST_LOCAL_OFFSET(src2, src2w); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + if (FAST_IS_REG(dst)) { + dst_r = dst; + if (dst == src1) + ; /* Do nothing here. */ + else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) { + /* Swap arguments. */ + src2 = src1; + src2w = src1w; + } + else if (dst != src2) + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w)); + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); + } + } + else { + dst_r = TMP_FREG; + FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w)); + } + + switch (GET_OPCODE(op)) { + case SLJIT_DADD: + FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + break; + + case SLJIT_DSUB: + FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + break; + + case SLJIT_DMUL: + FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + break; + + case SLJIT_DDIV: + FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w)); + break; + } + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +/* --------------------------------------------------------------------- */ +/* Conditional instructions */ +/* --------------------------------------------------------------------- */ + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) +{ + sljit_ub *inst; + struct sljit_label *label; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_label(compiler)); + + /* We should restore the flags before the label, + since other taken jumps has their own flags as well. */ + if (SLJIT_UNLIKELY(compiler->flags_saved)) + PTR_FAIL_IF(emit_restore_flags(compiler, 0)); + + if (compiler->last_label && compiler->last_label->size == compiler->size) + return compiler->last_label; + + label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label)); + PTR_FAIL_IF(!label); + set_label(label, compiler); + + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); + + *inst++ = 0; + *inst++ = 0; + + return label; +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type) +{ + sljit_ub *inst; + struct sljit_jump *jump; + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_jump(compiler, type)); + + if (SLJIT_UNLIKELY(compiler->flags_saved)) { + if ((type & 0xff) <= SLJIT_JUMP) + PTR_FAIL_IF(emit_restore_flags(compiler, 0)); + compiler->flags_saved = 0; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF_NULL(jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + type &= 0xff; + + if (type >= SLJIT_CALL1) + PTR_FAIL_IF(call_with_args(compiler, type)); + + /* Worst case size. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->size += (type >= SLJIT_JUMP) ? 5 : 6; +#else + compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3); +#endif + + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF_NULL(inst); + + *inst++ = 0; + *inst++ = type + 4; + return jump; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw) +{ + sljit_ub *inst; + struct sljit_jump *jump; + + CHECK_ERROR(); + CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); + ADJUST_LOCAL_OFFSET(src, srcw); + + CHECK_EXTRA_REGS(src, srcw, (void)0); + + if (SLJIT_UNLIKELY(compiler->flags_saved)) { + if (type <= SLJIT_JUMP) + FAIL_IF(emit_restore_flags(compiler, 0)); + compiler->flags_saved = 0; + } + + if (type >= SLJIT_CALL1) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (src == SLJIT_R2) { + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + src = TMP_REG1; + } + if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) + srcw += sizeof(sljit_sw); +#endif +#endif +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) + if (src == SLJIT_R2) { + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + src = TMP_REG1; + } +#endif + FAIL_IF(call_with_args(compiler, type)); + } + + if (src == SLJIT_IMM) { + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + FAIL_IF_NULL(jump); + set_jump(jump, compiler, JUMP_ADDR); + jump->u.target = srcw; + + /* Worst case size. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + compiler->size += 5; +#else + compiler->size += 10 + 3; +#endif + + inst = (sljit_ub*)ensure_buf(compiler, 2); + FAIL_IF_NULL(inst); + + *inst++ = 0; + *inst++ = type + 4; + } + else { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + /* REX_W is not necessary (src is not immediate). */ + compiler->mode32 = 1; +#endif + inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_FF; + *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm; + } + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op, + sljit_si dst, sljit_sw dstw, + sljit_si src, sljit_sw srcw, + sljit_si type) +{ + sljit_ub *inst; + sljit_ub cond_set = 0; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_si reg; +#else + /* CHECK_EXTRA_REGS migh overwrite these values. */ + sljit_si dst_save = dst; + sljit_sw dstw_save = dstw; +#endif + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + SLJIT_UNUSED_ARG(srcw); + + if (dst == SLJIT_UNUSED) + return SLJIT_SUCCESS; + + ADJUST_LOCAL_OFFSET(dst, dstw); + CHECK_EXTRA_REGS(dst, dstw, (void)0); + if (SLJIT_UNLIKELY(compiler->flags_saved)) + FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); + + type &= 0xff; + /* setcc = jcc + 0x10. */ + cond_set = get_jump_code(type) + 0x10; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3); + FAIL_IF(!inst); + INC_SIZE(4 + 3); + /* Set low register to conditional flag. */ + *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[TMP_REG1]; + *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B); + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]; + return SLJIT_SUCCESS; + } + + reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4); + FAIL_IF(!inst); + INC_SIZE(4 + 4); + /* Set low register to conditional flag. */ + *inst++ = (reg_map[reg] <= 7) ? REX : REX_B; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_lmap[reg]; + *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; + + if (reg != TMP_REG1) + return SLJIT_SUCCESS; + + if (GET_OPCODE(op) < SLJIT_ADD) { + compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + } +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0); +#else /* SLJIT_CONFIG_X86_64 */ + if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { + if (reg_map[dst] <= 4) { + /* Low byte is accessible. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3); + FAIL_IF(!inst); + INC_SIZE(3 + 3); + /* Set low byte to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | reg_map[dst]; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst]; + return SLJIT_SUCCESS; + } + + /* Low byte is not accessible. */ + if (cpu_has_cmov == -1) + get_cpu_features(); + + if (cpu_has_cmov) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1); + /* a xor reg, reg operation would overwrite the flags. */ + EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0); + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 3); + FAIL_IF(!inst); + INC_SIZE(3); + + *inst++ = GROUP_0F; + /* cmovcc = setcc - 0x50. */ + *inst++ = cond_set - 0x50; + *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]; + return SLJIT_SUCCESS; + } + + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 3 + 1); + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + /* Set al to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */; + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + return SLJIT_SUCCESS; + } + + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); + if (dst != SLJIT_R0) { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 2 + 1); + /* Set low register to conditional flag. */ + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst]; + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + } + else { + inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2); + FAIL_IF(!inst); + INC_SIZE(2 + 3 + 2 + 2); + /* Set low register to conditional flag. */ + *inst++ = XCHG_r_rm; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 1 /* ecx */; + *inst++ = OR_rm8_r8; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */; + *inst++ = XCHG_r_rm; + *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]; + } + return SLJIT_SUCCESS; + } + + /* Set TMP_REG1 to the bit. */ + inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1); + FAIL_IF(!inst); + INC_SIZE(1 + 3 + 3 + 1); + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + /* Set al to conditional flag. */ + *inst++ = GROUP_0F; + *inst++ = cond_set; + *inst++ = MOD_REG | 0 /* eax */; + + *inst++ = GROUP_0F; + *inst++ = MOVZX_r_rm8; + *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */; + + *inst++ = XCHG_EAX_r + reg_map[TMP_REG1]; + + if (GET_OPCODE(op) < SLJIT_ADD) + return emit_mov(compiler, dst, dstw, TMP_REG1, 0); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); +#endif /* SLJIT_CONFIG_X86_64 */ +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) +{ + CHECK_ERROR(); + CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; +#endif + + ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (NOT_HALFWORD(offset)) { + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); + return compiler->error; +#else + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); +#endif + } +#endif + + if (offset != 0) + return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) +{ + sljit_ub *inst; + struct sljit_const *const_; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + sljit_si reg; +#endif + + CHECK_ERROR_PTR(); + CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + CHECK_EXTRA_REGS(dst, dstw, (void)0); + + const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const)); + PTR_FAIL_IF(!const_); + set_const(const_, compiler); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 0; + reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; + + if (emit_load_imm64(compiler, reg, init_value)) + return NULL; +#else + if (dst == SLJIT_UNUSED) + dst = TMP_REG1; + + if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) + return NULL; +#endif + + inst = (sljit_ub*)ensure_buf(compiler, 2); + PTR_FAIL_IF(!inst); + + *inst++ = 0; + *inst++ = 1; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if (dst & SLJIT_MEM) + if (emit_mov(compiler, dst, dstw, TMP_REG1, 0)) + return NULL; +#endif + + return const_; +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +{ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + *(sljit_sw*)addr = new_addr - (addr + 4); +#else + *(sljit_uw*)addr = new_addr; +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +{ + *(sljit_sw*)addr = new_constant; +} diff --git a/src/sljit/sljitUtils.c b/src/sljit/sljitUtils.c new file mode 100644 index 0000000..3f12bb7 --- /dev/null +++ b/src/sljit/sljitUtils.c @@ -0,0 +1,332 @@ +/* + * Stack-less Just-In-Time compiler + * + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* ------------------------------------------------------------------------ */ +/* Locks */ +/* ------------------------------------------------------------------------ */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) || (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) + +#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +static SLJIT_INLINE void allocator_grab_lock(void) +{ + /* Always successful. */ +} + +static SLJIT_INLINE void allocator_release_lock(void) +{ + /* Always successful. */ +} + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) +{ + /* Always successful. */ +} + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) +{ + /* Always successful. */ +} + +#endif /* SLJIT_UTIL_GLOBAL_LOCK */ + +#elif defined(_WIN32) /* SLJIT_SINGLE_THREADED */ + +#include "windows.h" + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +static HANDLE allocator_mutex = 0; + +static SLJIT_INLINE void allocator_grab_lock(void) +{ + /* No idea what to do if an error occures. Static mutexes should never fail... */ + if (!allocator_mutex) + allocator_mutex = CreateMutex(NULL, TRUE, NULL); + else + WaitForSingleObject(allocator_mutex, INFINITE); +} + +static SLJIT_INLINE void allocator_release_lock(void) +{ + ReleaseMutex(allocator_mutex); +} + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) + +static HANDLE global_mutex = 0; + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) +{ + /* No idea what to do if an error occures. Static mutexes should never fail... */ + if (!global_mutex) + global_mutex = CreateMutex(NULL, TRUE, NULL); + else + WaitForSingleObject(global_mutex, INFINITE); +} + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) +{ + ReleaseMutex(global_mutex); +} + +#endif /* SLJIT_UTIL_GLOBAL_LOCK */ + +#else /* _WIN32 */ + +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#include + +static pthread_mutex_t allocator_mutex = PTHREAD_MUTEX_INITIALIZER; + +static SLJIT_INLINE void allocator_grab_lock(void) +{ + pthread_mutex_lock(&allocator_mutex); +} + +static SLJIT_INLINE void allocator_release_lock(void) +{ + pthread_mutex_unlock(&allocator_mutex); +} + +#endif /* SLJIT_EXECUTABLE_ALLOCATOR */ + +#if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) + +#include + +static pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_grab_lock(void) +{ + pthread_mutex_lock(&global_mutex); +} + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void) +{ + pthread_mutex_unlock(&global_mutex); +} + +#endif /* SLJIT_UTIL_GLOBAL_LOCK */ + +#endif /* _WIN32 */ + +/* ------------------------------------------------------------------------ */ +/* Stack */ +/* ------------------------------------------------------------------------ */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) || (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#ifdef _WIN32 +#include "windows.h" +#else +/* Provides mmap function. */ +#include +/* For detecting the page size. */ +#include + +#ifndef MAP_ANON + +#include + +/* Some old systems does not have MAP_ANON. */ +static sljit_si dev_zero = -1; + +#if (defined SLJIT_SINGLE_THREADED && SLJIT_SINGLE_THREADED) + +static SLJIT_INLINE sljit_si open_dev_zero(void) +{ + dev_zero = open("/dev/zero", O_RDWR); + return dev_zero < 0; +} + +#else /* SLJIT_SINGLE_THREADED */ + +#include + +static pthread_mutex_t dev_zero_mutex = PTHREAD_MUTEX_INITIALIZER; + +static SLJIT_INLINE sljit_si open_dev_zero(void) +{ + pthread_mutex_lock(&dev_zero_mutex); + dev_zero = open("/dev/zero", O_RDWR); + pthread_mutex_unlock(&dev_zero_mutex); + return dev_zero < 0; +} + +#endif /* SLJIT_SINGLE_THREADED */ + +#endif + +#endif + +#endif /* SLJIT_UTIL_STACK || SLJIT_EXECUTABLE_ALLOCATOR */ + +#if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) + +/* Planning to make it even more clever in the future. */ +static sljit_sw sljit_page_align = 0; + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data) +{ + struct sljit_stack *stack; + union { + void *ptr; + sljit_uw uw; + } base; +#ifdef _WIN32 + SYSTEM_INFO si; +#endif + + if (limit > max_limit || limit < 1) + return NULL; + +#ifdef _WIN32 + if (!sljit_page_align) { + GetSystemInfo(&si); + sljit_page_align = si.dwPageSize - 1; + } +#else + if (!sljit_page_align) { + sljit_page_align = sysconf(_SC_PAGESIZE); + /* Should never happen. */ + if (sljit_page_align < 0) + sljit_page_align = 4096; + sljit_page_align--; + } +#endif + + /* Align limit and max_limit. */ + max_limit = (max_limit + sljit_page_align) & ~sljit_page_align; + + stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); + if (!stack) + return NULL; + +#ifdef _WIN32 + base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE); + if (!base.ptr) { + SLJIT_FREE(stack); + return NULL; + } + stack->base = base.uw; + stack->limit = stack->base; + stack->max_limit = stack->base + max_limit; + if (sljit_stack_resize(stack, stack->base + limit)) { + sljit_free_stack(stack); + return NULL; + } +#else +#ifdef MAP_ANON + base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); +#else + if (dev_zero < 0) { + if (open_dev_zero()) { + SLJIT_FREE(stack); + return NULL; + } + } + base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); +#endif + if (base.ptr == MAP_FAILED) { + SLJIT_FREE(stack, allocator_data); + return NULL; + } + stack->base = base.uw; + stack->limit = stack->base + limit; + stack->max_limit = stack->base + max_limit; +#endif + stack->top = stack->base; + return stack; +} + +#undef PAGE_ALIGN + +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack, void *allocator_data) +{ +#ifdef _WIN32 + VirtualFree((void*)stack->base, 0, MEM_RELEASE); +#else + munmap((void*)stack->base, stack->max_limit - stack->base); +#endif + SLJIT_FREE(stack, allocator_data); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit) +{ + sljit_uw aligned_old_limit; + sljit_uw aligned_new_limit; + + if ((new_limit > stack->max_limit) || (new_limit < stack->base)) + return -1; +#ifdef _WIN32 + aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; + aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; + if (aligned_new_limit != aligned_old_limit) { + if (aligned_new_limit > aligned_old_limit) { + if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE)) + return -1; + } + else { + if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT)) + return -1; + } + } + stack->limit = new_limit; + return 0; +#else + if (new_limit >= stack->limit) { + stack->limit = new_limit; + return 0; + } + aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; + aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; + /* If madvise is available, we release the unnecessary space. */ +#if defined(MADV_DONTNEED) + if (aligned_new_limit < aligned_old_limit) + madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED); +#elif defined(POSIX_MADV_DONTNEED) + if (aligned_new_limit < aligned_old_limit) + posix_madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, POSIX_MADV_DONTNEED); +#endif + stack->limit = new_limit; + return 0; +#endif +} + +#endif /* SLJIT_UTIL_STACK */ + +#endif