Fix unaligned accesses on x86. Patch by Marc Mutz.

This commit is contained in:
Zoltán Herczeg 2016-07-15 10:02:19 +00:00
parent 22c4999971
commit 063f45de11
5 changed files with 71 additions and 37 deletions

View File

@ -172,6 +172,8 @@ large stack size when testing with clang.
44. In pcre2test, ignore "allcaptures" after DFA matching. 44. In pcre2test, ignore "allcaptures" after DFA matching.
45. Fix unaligned accesses on x86. Patch by Marc Mutz.
Version 10.21 12-January-2016 Version 10.21 12-January-2016
----------------------------- -----------------------------

View File

@ -214,6 +214,10 @@
#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) #define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
#endif #endif
#ifndef SLJIT_MEMCPY
#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len)
#endif
#ifndef SLJIT_ZEROMEM #ifndef SLJIT_ZEROMEM
#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) #define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
#endif #endif

View File

@ -34,7 +34,7 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(1 + sizeof(sljit_sw)); INC_SIZE(1 + sizeof(sljit_sw));
*inst++ = opcode; *inst++ = opcode;
*(sljit_sw*)inst = imm; sljit_unaligned_store_sw(inst, imm);
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -57,7 +57,7 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
if (jump->flags & JUMP_LABEL) if (jump->flags & JUMP_LABEL)
jump->flags |= PATCH_MW; jump->flags |= PATCH_MW;
else else
*(sljit_sw*)code_ptr = jump->u.target - (jump->addr + 4); sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4));
code_ptr += 4; code_ptr += 4;
return code_ptr; return code_ptr;
@ -151,12 +151,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP]; inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP];
inst[2] = GROUP_F7; inst[2] = GROUP_F7;
inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP]; inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP];
*(sljit_sw*)(inst + 4) = 0x4; sljit_unaligned_store_sw(inst + 4, 0x4);
inst[8] = JNE_i8; inst[8] = JNE_i8;
inst[9] = 6; inst[9] = 6;
inst[10] = GROUP_BINARY_81; inst[10] = GROUP_BINARY_81;
inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP]; inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP];
*(sljit_sw*)(inst + 12) = 0x4; sljit_unaligned_store_sw(inst + 12, 0x4);
inst[16] = PUSH_r + reg_map[TMP_REG1]; inst[16] = PUSH_r + reg_map[TMP_REG1];
} }
else else
@ -406,7 +406,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
if (immb <= 127 && immb >= -128) if (immb <= 127 && immb >= -128)
*buf_ptr++ = immb; /* 8 bit displacement. */ *buf_ptr++ = immb; /* 8 bit displacement. */
else { else {
*(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_sw); buf_ptr += sizeof(sljit_sw);
} }
} }
@ -418,7 +418,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
} }
else { else {
*buf_ptr++ |= 0x05; *buf_ptr++ |= 0x05;
*(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_sw); buf_ptr += sizeof(sljit_sw);
} }
@ -426,9 +426,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
if (flags & EX86_BYTE_ARG) if (flags & EX86_BYTE_ARG)
*buf_ptr = imma; *buf_ptr = imma;
else if (flags & EX86_HALF_ARG) else if (flags & EX86_HALF_ARG)
*(short*)buf_ptr = imma; sljit_unaligned_store_s16(buf_ptr, imma);
else if (!(flags & EX86_SHIFT_INS)) else if (!(flags & EX86_SHIFT_INS))
*(sljit_sw*)buf_ptr = imma; sljit_unaligned_store_sw(buf_ptr, imma);
} }
return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
@ -541,7 +541,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
INC_SIZE(5 + 1); INC_SIZE(5 + 1);
*inst++ = PUSH_i32; *inst++ = PUSH_i32;
*(sljit_sw*)inst = srcw; sljit_unaligned_store_sw(inst, srcw);
inst += sizeof(sljit_sw); inst += sizeof(sljit_sw);
} }

View File

@ -35,7 +35,7 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg,
INC_SIZE(2 + sizeof(sljit_sw)); INC_SIZE(2 + sizeof(sljit_sw));
*inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
*inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
*(sljit_sw*)inst = imm; sljit_unaligned_store_sw(inst, imm);
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -55,7 +55,7 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
if (jump->flags & JUMP_LABEL) if (jump->flags & JUMP_LABEL)
jump->flags |= PATCH_MD; jump->flags |= PATCH_MD;
else else
*(sljit_sw*)code_ptr = jump->u.target; sljit_unaligned_store_sw(code_ptr, jump->u.target);
code_ptr += sizeof(sljit_sw); code_ptr += sizeof(sljit_sw);
*code_ptr++ = REX_B; *code_ptr++ = REX_B;
@ -71,13 +71,13 @@ static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s3
if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) { if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) {
*code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32; *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32;
*(sljit_sw*)code_ptr = delta; sljit_unaligned_store_sw(code_ptr, delta);
} }
else { else {
SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second); SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second);
*code_ptr++ = REX_W | REX_B; *code_ptr++ = REX_W | REX_B;
*code_ptr++ = MOV_r_i32 + 1; *code_ptr++ = MOV_r_i32 + 1;
*(sljit_sw*)code_ptr = addr; sljit_unaligned_store_sw(code_ptr, addr);
code_ptr += sizeof(sljit_sw); code_ptr += sizeof(sljit_sw);
*code_ptr++ = REX_B; *code_ptr++ = REX_B;
*code_ptr++ = GROUP_FF; *code_ptr++ = GROUP_FF;
@ -193,7 +193,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
*inst++ = REX_W; *inst++ = REX_W;
*inst++ = MOV_rm_i32; *inst++ = MOV_rm_i32;
*inst++ = MOD_REG | reg_lmap[SLJIT_R0]; *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
*(sljit_s32*)inst = local_size; sljit_unaligned_store_s32(inst, local_size);
#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
compiler->skip_checks = 1; compiler->skip_checks = 1;
@ -219,7 +219,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
*inst++ = REX_W; *inst++ = REX_W;
*inst++ = GROUP_BINARY_81; *inst++ = GROUP_BINARY_81;
*inst++ = MOD_REG | SUB | 4; *inst++ = MOD_REG | SUB | 4;
*(sljit_s32*)inst = local_size; sljit_unaligned_store_s32(inst, local_size);
inst += sizeof(sljit_s32); inst += sizeof(sljit_s32);
} }
@ -230,7 +230,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(5); INC_SIZE(5);
*inst++ = GROUP_0F; *inst++ = GROUP_0F;
*(sljit_s32*)inst = 0x20247429; sljit_unaligned_store_s32(inst, 0x20247429);
} }
#endif #endif
@ -271,7 +271,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(5); INC_SIZE(5);
*inst++ = GROUP_0F; *inst++ = GROUP_0F;
*(sljit_s32*)inst = 0x20247428; sljit_unaligned_store_s32(inst, 0x20247428);
} }
#endif #endif
@ -292,7 +292,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
*inst++ = REX_W; *inst++ = REX_W;
*inst++ = GROUP_BINARY_81; *inst++ = GROUP_BINARY_81;
*inst++ = MOD_REG | ADD | 4; *inst++ = MOD_REG | ADD | 4;
*(sljit_s32*)inst = compiler->local_size; sljit_unaligned_store_s32(inst, compiler->local_size);
} }
tmp = compiler->scratches; tmp = compiler->scratches;
@ -339,7 +339,7 @@ static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sl
if (rex) if (rex)
*inst++ = rex; *inst++ = rex;
*inst++ = opcode; *inst++ = opcode;
*(sljit_s32*)inst = imm; sljit_unaligned_store_s32(inst, imm);
return SLJIT_SUCCESS; return SLJIT_SUCCESS;
} }
@ -516,7 +516,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
if (immb <= 127 && immb >= -128) if (immb <= 127 && immb >= -128)
*buf_ptr++ = immb; /* 8 bit displacement. */ *buf_ptr++ = immb; /* 8 bit displacement. */
else { else {
*(sljit_s32*)buf_ptr = immb; /* 32 bit displacement. */ sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_s32); buf_ptr += sizeof(sljit_s32);
} }
} }
@ -533,7 +533,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
else { else {
*buf_ptr++ |= 0x04; *buf_ptr++ |= 0x04;
*buf_ptr++ = 0x25; *buf_ptr++ = 0x25;
*(sljit_s32*)buf_ptr = immb; /* 32 bit displacement. */ sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
buf_ptr += sizeof(sljit_s32); buf_ptr += sizeof(sljit_s32);
} }
@ -541,9 +541,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32
if (flags & EX86_BYTE_ARG) if (flags & EX86_BYTE_ARG)
*buf_ptr = imma; *buf_ptr = imma;
else if (flags & EX86_HALF_ARG) else if (flags & EX86_HALF_ARG)
*(short*)buf_ptr = imma; sljit_unaligned_store_s16(buf_ptr, imma);
else if (!(flags & EX86_SHIFT_INS)) else if (!(flags & EX86_SHIFT_INS))
*(sljit_s32*)buf_ptr = imma; sljit_unaligned_store_s32(buf_ptr, imma);
} }
return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
@ -676,7 +676,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
INC_SIZE(5 + 1); INC_SIZE(5 + 1);
*inst++ = PUSH_i32; *inst++ = PUSH_i32;
*(sljit_s32*)inst = srcw; sljit_unaligned_store_s32(inst, srcw);
inst += sizeof(sljit_s32); inst += sizeof(sljit_s32);
} }

View File

@ -279,6 +279,34 @@ static sljit_s32 cpu_has_cmov = -1;
#include <intrin.h> #include <intrin.h>
#endif #endif
/******************************************************/
/* Unaligned-store functions */
/******************************************************/
static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
{
SLJIT_MEMCPY(addr, &value, sizeof(value));
}
static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
{
SLJIT_MEMCPY(addr, &value, sizeof(value));
}
static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
{
SLJIT_MEMCPY(addr, &value, sizeof(value));
}
static SLJIT_INLINE void sljit_unaligned_store_uw(void *addr, sljit_uw value)
{
SLJIT_MEMCPY(addr, &value, sizeof(value));
}
/******************************************************/
/* Utility functions */
/******************************************************/
static void get_cpu_features(void) static void get_cpu_features(void)
{ {
sljit_u32 features; sljit_u32 features;
@ -504,7 +532,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
*code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
buf_ptr++; buf_ptr++;
*(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)));
code_ptr += sizeof(sljit_sw); code_ptr += sizeof(sljit_sw);
buf_ptr += sizeof(sljit_sw) - 1; buf_ptr += sizeof(sljit_sw) - 1;
#else #else
@ -531,24 +559,24 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
} else if (jump->flags & PATCH_MW) { } else if (jump->flags & PATCH_MW) {
if (jump->flags & JUMP_LABEL) { if (jump->flags & JUMP_LABEL) {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))));
#else #else
SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
*(sljit_s32*)jump->addr = (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))); sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))));
#endif #endif
} }
else { else {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))));
#else #else
SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
*(sljit_s32*)jump->addr = (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))); sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))));
#endif #endif
} }
} }
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
else if (jump->flags & PATCH_MD) else if (jump->flags & PATCH_MD)
*(sljit_sw*)jump->addr = jump->u.label->addr; sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
#endif #endif
jump = jump->next; jump = jump->next;
@ -1699,7 +1727,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(4); INC_SIZE(4);
*(sljit_sw*)inst = src1w; sljit_unaligned_store_sw(inst, src1w);
} }
#else #else
else if (IS_HALFWORD(src1w)) { else if (IS_HALFWORD(src1w)) {
@ -1709,7 +1737,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(4); INC_SIZE(4);
*(sljit_s32*)inst = (sljit_s32)src1w; sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
} }
else { else {
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
@ -1742,7 +1770,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(4); INC_SIZE(4);
*(sljit_sw*)inst = src2w; sljit_unaligned_store_sw(inst, src2w);
} }
#else #else
else if (IS_HALFWORD(src2w)) { else if (IS_HALFWORD(src2w)) {
@ -1752,7 +1780,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler,
inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
FAIL_IF(!inst); FAIL_IF(!inst);
INC_SIZE(4); INC_SIZE(4);
*(sljit_s32*)inst = (sljit_s32)src2w; sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
} }
else { else {
EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
@ -2926,15 +2954,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
{ {
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
*(sljit_sw*)addr = new_addr - (addr + 4); sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4));
#else #else
*(sljit_uw*)addr = new_addr; sljit_unaligned_store_uw((void*)addr, new_addr);
#endif #endif
} }
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
{ {
*(sljit_sw*)addr = new_constant; sljit_unaligned_store_sw((void*)addr, new_constant);
} }
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)