From 063f45de11a399469169394db57b76dc8acb9648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zolt=C3=A1n=20Herczeg?= Date: Fri, 15 Jul 2016 10:02:19 +0000 Subject: [PATCH] Fix unaligned accesses on x86. Patch by Marc Mutz. --- ChangeLog | 2 ++ src/sljit/sljitConfigInternal.h | 4 +++ src/sljit/sljitNativeX86_32.c | 18 +++++------ src/sljit/sljitNativeX86_64.c | 30 ++++++++--------- src/sljit/sljitNativeX86_common.c | 54 +++++++++++++++++++++++-------- 5 files changed, 71 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8faf636..87c107a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -172,6 +172,8 @@ large stack size when testing with clang. 44. In pcre2test, ignore "allcaptures" after DFA matching. +45. Fix unaligned accesses on x86. Patch by Marc Mutz. + Version 10.21 12-January-2016 ----------------------------- diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h index 9275b14..1456bd5 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/src/sljit/sljitConfigInternal.h @@ -214,6 +214,10 @@ #define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) #endif +#ifndef SLJIT_MEMCPY +#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len) +#endif + #ifndef SLJIT_ZEROMEM #define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) #endif diff --git a/src/sljit/sljitNativeX86_32.c b/src/sljit/sljitNativeX86_32.c index cd3c656..78f3dcb 100644 --- a/src/sljit/sljitNativeX86_32.c +++ b/src/sljit/sljitNativeX86_32.c @@ -34,7 +34,7 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s FAIL_IF(!inst); INC_SIZE(1 + sizeof(sljit_sw)); *inst++ = opcode; - *(sljit_sw*)inst = imm; + sljit_unaligned_store_sw(inst, imm); return SLJIT_SUCCESS; } @@ -57,7 +57,7 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ if (jump->flags & JUMP_LABEL) jump->flags |= PATCH_MW; else - *(sljit_sw*)code_ptr = jump->u.target - (jump->addr + 4); + sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4)); code_ptr += 4; return code_ptr; @@ -151,12 +151,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP]; inst[2] = GROUP_F7; inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP]; - *(sljit_sw*)(inst + 4) = 0x4; + sljit_unaligned_store_sw(inst + 4, 0x4); inst[8] = JNE_i8; inst[9] = 6; inst[10] = GROUP_BINARY_81; inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP]; - *(sljit_sw*)(inst + 12) = 0x4; + sljit_unaligned_store_sw(inst + 12, 0x4); inst[16] = PUSH_r + reg_map[TMP_REG1]; } else @@ -406,7 +406,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (immb <= 127 && immb >= -128) *buf_ptr++ = immb; /* 8 bit displacement. */ else { - *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_sw); } } @@ -418,7 +418,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 } else { *buf_ptr++ |= 0x05; - *(sljit_sw*)buf_ptr = immb; /* 32 bit displacement. */ + sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_sw); } @@ -426,9 +426,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (flags & EX86_BYTE_ARG) *buf_ptr = imma; else if (flags & EX86_HALF_ARG) - *(short*)buf_ptr = imma; + sljit_unaligned_store_s16(buf_ptr, imma); else if (!(flags & EX86_SHIFT_INS)) - *(sljit_sw*)buf_ptr = imma; + sljit_unaligned_store_sw(buf_ptr, imma); } return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); @@ -541,7 +541,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler INC_SIZE(5 + 1); *inst++ = PUSH_i32; - *(sljit_sw*)inst = srcw; + sljit_unaligned_store_sw(inst, srcw); inst += sizeof(sljit_sw); } diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c index 6ea27e5..e88dded 100644 --- a/src/sljit/sljitNativeX86_64.c +++ b/src/sljit/sljitNativeX86_64.c @@ -35,7 +35,7 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, INC_SIZE(2 + sizeof(sljit_sw)); *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7); - *(sljit_sw*)inst = imm; + sljit_unaligned_store_sw(inst, imm); return SLJIT_SUCCESS; } @@ -55,7 +55,7 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ if (jump->flags & JUMP_LABEL) jump->flags |= PATCH_MD; else - *(sljit_sw*)code_ptr = jump->u.target; + sljit_unaligned_store_sw(code_ptr, jump->u.target); code_ptr += sizeof(sljit_sw); *code_ptr++ = REX_B; @@ -71,13 +71,13 @@ static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s3 if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) { *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32; - *(sljit_sw*)code_ptr = delta; + sljit_unaligned_store_sw(code_ptr, delta); } else { SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second); *code_ptr++ = REX_W | REX_B; *code_ptr++ = MOV_r_i32 + 1; - *(sljit_sw*)code_ptr = addr; + sljit_unaligned_store_sw(code_ptr, addr); code_ptr += sizeof(sljit_sw); *code_ptr++ = REX_B; *code_ptr++ = GROUP_FF; @@ -193,7 +193,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi *inst++ = REX_W; *inst++ = MOV_rm_i32; *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; - *(sljit_s32*)inst = local_size; + sljit_unaligned_store_s32(inst, local_size); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; @@ -219,7 +219,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi *inst++ = REX_W; *inst++ = GROUP_BINARY_81; *inst++ = MOD_REG | SUB | 4; - *(sljit_s32*)inst = local_size; + sljit_unaligned_store_s32(inst, local_size); inst += sizeof(sljit_s32); } @@ -230,7 +230,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi FAIL_IF(!inst); INC_SIZE(5); *inst++ = GROUP_0F; - *(sljit_s32*)inst = 0x20247429; + sljit_unaligned_store_s32(inst, 0x20247429); } #endif @@ -271,7 +271,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp FAIL_IF(!inst); INC_SIZE(5); *inst++ = GROUP_0F; - *(sljit_s32*)inst = 0x20247428; + sljit_unaligned_store_s32(inst, 0x20247428); } #endif @@ -292,7 +292,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp *inst++ = REX_W; *inst++ = GROUP_BINARY_81; *inst++ = MOD_REG | ADD | 4; - *(sljit_s32*)inst = compiler->local_size; + sljit_unaligned_store_s32(inst, compiler->local_size); } tmp = compiler->scratches; @@ -339,7 +339,7 @@ static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sl if (rex) *inst++ = rex; *inst++ = opcode; - *(sljit_s32*)inst = imm; + sljit_unaligned_store_s32(inst, imm); return SLJIT_SUCCESS; } @@ -516,7 +516,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (immb <= 127 && immb >= -128) *buf_ptr++ = immb; /* 8 bit displacement. */ else { - *(sljit_s32*)buf_ptr = immb; /* 32 bit displacement. */ + sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_s32); } } @@ -533,7 +533,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 else { *buf_ptr++ |= 0x04; *buf_ptr++ = 0x25; - *(sljit_s32*)buf_ptr = immb; /* 32 bit displacement. */ + sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_s32); } @@ -541,9 +541,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (flags & EX86_BYTE_ARG) *buf_ptr = imma; else if (flags & EX86_HALF_ARG) - *(short*)buf_ptr = imma; + sljit_unaligned_store_s16(buf_ptr, imma); else if (!(flags & EX86_SHIFT_INS)) - *(sljit_s32*)buf_ptr = imma; + sljit_unaligned_store_s32(buf_ptr, imma); } return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); @@ -676,7 +676,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler INC_SIZE(5 + 1); *inst++ = PUSH_i32; - *(sljit_s32*)inst = srcw; + sljit_unaligned_store_s32(inst, srcw); inst += sizeof(sljit_s32); } diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c index 54c3ac7..0d1de96 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/src/sljit/sljitNativeX86_common.c @@ -279,6 +279,34 @@ static sljit_s32 cpu_has_cmov = -1; #include #endif +/******************************************************/ +/* Unaligned-store functions */ +/******************************************************/ + +static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +static SLJIT_INLINE void sljit_unaligned_store_uw(void *addr, sljit_uw value) +{ + SLJIT_MEMCPY(addr, &value, sizeof(value)); +} + +/******************************************************/ +/* Utility functions */ +/******************************************************/ + static void get_cpu_features(void) { sljit_u32 features; @@ -504,7 +532,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; buf_ptr++; - *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)); + sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw))); code_ptr += sizeof(sljit_sw); buf_ptr += sizeof(sljit_sw) - 1; #else @@ -531,24 +559,24 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } else if (jump->flags & PATCH_MW) { if (jump->flags & JUMP_LABEL) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)))); #else SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); - *(sljit_s32*)jump->addr = (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32)))); #endif } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)))); #else SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); - *(sljit_s32*)jump->addr = (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32)))); #endif } } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) else if (jump->flags & PATCH_MD) - *(sljit_sw*)jump->addr = jump->u.label->addr; + sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr); #endif jump = jump->next; @@ -1699,7 +1727,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); - *(sljit_sw*)inst = src1w; + sljit_unaligned_store_sw(inst, src1w); } #else else if (IS_HALFWORD(src1w)) { @@ -1709,7 +1737,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); - *(sljit_s32*)inst = (sljit_s32)src1w; + sljit_unaligned_store_s32(inst, (sljit_s32)src1w); } else { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); @@ -1742,7 +1770,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); - *(sljit_sw*)inst = src2w; + sljit_unaligned_store_sw(inst, src2w); } #else else if (IS_HALFWORD(src2w)) { @@ -1752,7 +1780,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); FAIL_IF(!inst); INC_SIZE(4); - *(sljit_s32*)inst = (sljit_s32)src2w; + sljit_unaligned_store_s32(inst, (sljit_s32)src2w); } else { EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); @@ -2926,15 +2954,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *(sljit_sw*)addr = new_addr - (addr + 4); + sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4)); #else - *(sljit_uw*)addr = new_addr; + sljit_unaligned_store_uw((void*)addr, new_addr); #endif } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) { - *(sljit_sw*)addr = new_constant; + sljit_unaligned_store_sw((void*)addr, new_constant); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)