From 26a6b910db9b0cad2712066aa79f8a6f9e22d235 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sun, 13 Nov 2016 16:44:42 +0100 Subject: [PATCH] Applied mainline PCem commit: Added code generation for RMW versions of ADD/SUB/OR/XOR/AND. --- src/codegen.c | 3 - src/codegen.h | 18 +- src/codegen_ops.c | 3 - src/codegen_ops.h | 3 - src/codegen_ops_arith.h | 296 +++++++-- src/codegen_ops_jump.h | 3 - src/codegen_ops_logic.h | 93 ++- src/codegen_ops_shift.h | 3 - src/codegen_ops_x86-64.h | 1122 +++++++++++++++++++++++++++++++--- src/codegen_ops_x86.h | 123 ++++ src/codegen_ops_xchg.h | 11 +- src/codegen_timing_486.c | 3 - src/codegen_timing_686.c | 3 - src/codegen_timing_pentium.c | 3 - src/codegen_timing_winchip.c | 3 - src/codegen_x86-64.h | 5 +- src/codegen_x86.c | 641 ++++++++++++++++++- src/codegen_x86.h | 12 + 18 files changed, 2136 insertions(+), 212 deletions(-) diff --git a/src/codegen.c b/src/codegen.c index 50f510d30..5667c9bad 100644 --- a/src/codegen.c +++ b/src/codegen.c @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ #include "ibm.h" #include "x86_ops.h" #include "mem.h" diff --git a/src/codegen.h b/src/codegen.h index 7b635623a..ed5ce3a5f 100644 --- a/src/codegen.h +++ b/src/codegen.h @@ -60,7 +60,7 @@ typedef struct codeblock_t int TOP; uint64_t cmp; - + uint8_t data[2048]; } codeblock_t; @@ -72,7 +72,7 @@ typedef struct codeblock_t static inline codeblock_t *codeblock_tree_find(uint32_t phys, uint32_t _cs) { codeblock_t *block = pages[phys >> 12].head; - uint64_t a = _cs | ((uint64_t)phys << 32); + uint64_t a = _cs | ((uint64_t)phys << 32); while (block) { @@ -90,10 +90,9 @@ static inline codeblock_t *codeblock_tree_find(uint32_t phys, uint32_t _cs) static inline void codeblock_tree_add(codeblock_t *new_block) { codeblock_t *block = pages[new_block->phys >> 12].head; - uint64_t a = new_block->_cs | ((uint64_t)new_block->phys << 32); new_block->cmp = a; - + if (!block) { pages[new_block->phys >> 12].head = new_block; @@ -102,11 +101,10 @@ static inline void codeblock_tree_add(codeblock_t *new_block) else { codeblock_t *old_block = NULL; - + while (block) { old_block = block; - if (a < old_block->cmp) block = block->left; else @@ -302,7 +300,7 @@ extern int block_pos; static inline void addbyte(uint8_t val) { codeblock[block_current].data[block_pos++] = val; - if (block_pos >= 1760) + if (block_pos >= BLOCK_MAX) { CPU_BLOCK_END(); } @@ -312,7 +310,7 @@ static inline void addword(uint16_t val) { *(uint16_t *)&codeblock[block_current].data[block_pos] = val; block_pos += 2; - if (block_pos >= 1720) + if (block_pos >= BLOCK_MAX) { CPU_BLOCK_END(); } @@ -322,7 +320,7 @@ static inline void addlong(uint32_t val) { *(uint32_t *)&codeblock[block_current].data[block_pos] = val; block_pos += 4; - if (block_pos >= 1720) + if (block_pos >= BLOCK_MAX) { CPU_BLOCK_END(); } @@ -332,7 +330,7 @@ static inline void addquad(uint64_t val) { *(uint64_t *)&codeblock[block_current].data[block_pos] = val; block_pos += 8; - if (block_pos >= 1720) + if (block_pos >= BLOCK_MAX) { CPU_BLOCK_END(); } diff --git a/src/codegen_ops.c b/src/codegen_ops.c index 179c7398a..083c80b7d 100644 --- a/src/codegen_ops.c +++ b/src/codegen_ops.c @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ #include "ibm.h" #include "x86.h" #include "x86_ops.h" diff --git a/src/codegen_ops.h b/src/codegen_ops.h index 4a7474990..36b21918e 100644 --- a/src/codegen_ops.h +++ b/src/codegen_ops.h @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ typedef uint32_t (*RecompOpFn)(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block); extern RecompOpFn recomp_opcodes[512]; diff --git a/src/codegen_ops_arith.h b/src/codegen_ops_arith.h index 75fb7dbe8..b732f9f9f 100644 --- a/src/codegen_ops_arith.h +++ b/src/codegen_ops_arith.h @@ -79,19 +79,38 @@ static uint32_t ropDEC_rl(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uin static uint32_t rop ## name ## _b_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ { \ int src_reg, dst_reg; \ + x86seg *target_seg; \ \ - if ((fetchdat & 0xc0) != 0xc0) \ - return 0; \ - \ - dst_reg = LOAD_REG_B(fetchdat & 7); \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_B(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_B_NO_ABRT(target_seg); \ + } \ STORE_IMM_ADDR_L((uint32_t)&cpu_state.flags_op, FLAGS_ ## op ## 8); \ src_reg = LOAD_REG_B((fetchdat >> 3) & 7); \ STORE_HOST_REG_ADDR_BL((uint32_t)&cpu_state.flags_op1, dst_reg); \ STORE_HOST_REG_ADDR_BL((uint32_t)&cpu_state.flags_op2, src_reg); \ op ## _HOST_REG_B(dst_reg, src_reg); \ STORE_HOST_REG_ADDR_BL((uint32_t)&cpu_state.flags_res, dst_reg); \ - if (writeback) STORE_REG_B_RELEASE(dst_reg); \ - else RELEASE_REG(dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_B_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_B_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ RELEASE_REG(src_reg); \ \ codegen_flags_changed = 1; \ @@ -100,19 +119,38 @@ static uint32_t ropDEC_rl(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uin static uint32_t rop ## name ## _w_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ { \ int src_reg, dst_reg; \ + x86seg *target_seg; \ \ - if ((fetchdat & 0xc0) != 0xc0) \ - return 0; \ - \ - dst_reg = LOAD_REG_W(fetchdat & 7); \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_W(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_W(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); \ + } \ STORE_IMM_ADDR_L((uint32_t)&cpu_state.flags_op, FLAGS_ ## op ## 16); \ src_reg = LOAD_REG_W((fetchdat >> 3) & 7); \ STORE_HOST_REG_ADDR_WL((uint32_t)&cpu_state.flags_op1, dst_reg); \ STORE_HOST_REG_ADDR_WL((uint32_t)&cpu_state.flags_op2, src_reg); \ op ## _HOST_REG_W(dst_reg, src_reg); \ STORE_HOST_REG_ADDR_WL((uint32_t)&cpu_state.flags_res, dst_reg); \ - if (writeback) STORE_REG_W_RELEASE(dst_reg); \ - else RELEASE_REG(dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_W_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ RELEASE_REG(src_reg); \ \ codegen_flags_changed = 1; \ @@ -121,19 +159,38 @@ static uint32_t ropDEC_rl(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uin static uint32_t rop ## name ## _l_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ { \ int src_reg, dst_reg; \ + x86seg *target_seg; \ \ - if ((fetchdat & 0xc0) != 0xc0) \ - return 0; \ - \ - dst_reg = LOAD_REG_L(fetchdat & 7); \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_L(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_L(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); \ + } \ STORE_IMM_ADDR_L((uint32_t)&cpu_state.flags_op, FLAGS_ ## op ## 32); \ src_reg = LOAD_REG_L((fetchdat >> 3) & 7); \ STORE_HOST_REG_ADDR((uint32_t)&cpu_state.flags_op1, dst_reg); \ STORE_HOST_REG_ADDR((uint32_t)&cpu_state.flags_op2, src_reg); \ op ## _HOST_REG_L(dst_reg, src_reg); \ STORE_HOST_REG_ADDR((uint32_t)&cpu_state.flags_res, dst_reg); \ - if (writeback) STORE_REG_L_RELEASE(dst_reg); \ - else RELEASE_REG(dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_L_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ RELEASE_REG(src_reg); \ \ codegen_flags_changed = 1; \ @@ -541,12 +598,33 @@ static uint32_t rop80(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_ { int host_reg; uint32_t imm; + x86seg *target_seg; - if ((fetchdat & 0x30) == 0x10 || (fetchdat & 0xc0) != 0xc0) + if ((fetchdat & 0x30) == 0x10) return 0; - - host_reg = LOAD_REG_B(fetchdat & 7); - imm = (fetchdat >> 8) & 0xff; + + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_B(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE(target_seg); + host_reg = MEM_LOAD_ADDR_EA_B_NO_ABRT(target_seg); + } + imm = fastreadb(cs + op_pc + 1); + } + else + { + host_reg = LOAD_REG_B(fetchdat & 7); + imm = (fetchdat >> 8) & 0xff; + } switch (fetchdat & 0x38) { @@ -584,7 +662,17 @@ static uint32_t rop80(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_ STORE_HOST_REG_ADDR_BL((uint32_t)&cpu_state.flags_res, host_reg); if ((fetchdat & 0x38) != 0x38) - STORE_REG_B_RELEASE(host_reg); + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_B_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_B_RELEASE(host_reg); + } + } else RELEASE_REG(host_reg); @@ -596,12 +684,33 @@ static uint32_t rop81_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint3 { int host_reg; uint32_t imm; - - if ((fetchdat & 0x30) == 0x10 || (fetchdat & 0xc0) != 0xc0) + x86seg *target_seg; + + if ((fetchdat & 0x30) == 0x10) return 0; - host_reg = LOAD_REG_W(fetchdat & 7); - imm = (fetchdat >> 8) & 0xffff; + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_W(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE_W(target_seg); + host_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); + } + imm = fastreadw(cs + op_pc + 1); + } + else + { + host_reg = LOAD_REG_W(fetchdat & 7); + imm = (fetchdat >> 8) & 0xffff; + } switch (fetchdat & 0x38) { @@ -639,7 +748,17 @@ static uint32_t rop81_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint3 STORE_HOST_REG_ADDR_WL((uint32_t)&cpu_state.flags_res, host_reg); if ((fetchdat & 0x38) != 0x38) - STORE_REG_W_RELEASE(host_reg); + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_W_RELEASE(host_reg); + } + } else RELEASE_REG(host_reg); @@ -650,13 +769,32 @@ static uint32_t rop81_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint3 { int host_reg; uint32_t imm; + x86seg *target_seg; - if ((fetchdat & 0x30) == 0x10 || (fetchdat & 0xc0) != 0xc0) + if ((fetchdat & 0x30) == 0x10) return 0; + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_L(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE(target_seg); + host_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); + } + } + else + { + host_reg = LOAD_REG_L(fetchdat & 7); + } imm = fastreadl(cs + op_pc + 1); - - host_reg = LOAD_REG_L(fetchdat & 7); switch (fetchdat & 0x38) { @@ -694,7 +832,17 @@ static uint32_t rop81_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint3 STORE_HOST_REG_ADDR((uint32_t)&cpu_state.flags_res, host_reg); if ((fetchdat & 0x38) != 0x38) - STORE_REG_L_RELEASE(host_reg); + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_L_RELEASE(host_reg); + } + } else RELEASE_REG(host_reg); @@ -706,12 +854,34 @@ static uint32_t rop83_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint3 { int host_reg; uint32_t imm; - - if ((fetchdat & 0x30) == 0x10 || (fetchdat & 0xc0) != 0xc0) + x86seg *target_seg; + + if ((fetchdat & 0x30) == 0x10) return 0; - host_reg = LOAD_REG_W(fetchdat & 7); - imm = (fetchdat >> 8) & 0xff; + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_W(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE_W(target_seg); + host_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); + } + imm = fastreadb(cs + op_pc + 1); + } + else + { + host_reg = LOAD_REG_W(fetchdat & 7); + imm = (fetchdat >> 8) & 0xff; + } + if (imm & 0x80) imm |= 0xff80; @@ -751,7 +921,17 @@ static uint32_t rop83_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint3 STORE_HOST_REG_ADDR_WL((uint32_t)&cpu_state.flags_res, host_reg); if ((fetchdat & 0x38) != 0x38) - STORE_REG_W_RELEASE(host_reg); + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_W_RELEASE(host_reg); + } + } else RELEASE_REG(host_reg); @@ -762,12 +942,34 @@ static uint32_t rop83_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint3 { int host_reg; uint32_t imm; + x86seg *target_seg; - if ((fetchdat & 0x30) == 0x10 || (fetchdat & 0xc0) != 0xc0) + if ((fetchdat & 0x30) == 0x10) return 0; - host_reg = LOAD_REG_L(fetchdat & 7); - imm = (fetchdat >> 8) & 0xff; + if ((fetchdat & 0xc0) != 0xc0) + { + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); + if ((fetchdat & 0x38) == 0x38) + { + MEM_LOAD_ADDR_EA_L(target_seg); + host_reg = 0; + } + else + { + SAVE_EA(); + MEM_CHECK_WRITE_L(target_seg); + host_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); + } + imm = fastreadb(cs + op_pc + 1); + } + else + { + host_reg = LOAD_REG_L(fetchdat & 7); + imm = (fetchdat >> 8) & 0xff; + } + if (imm & 0x80) imm |= 0xffffff80; @@ -807,10 +1009,20 @@ static uint32_t rop83_l(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint3 STORE_HOST_REG_ADDR((uint32_t)&cpu_state.flags_res, host_reg); if ((fetchdat & 0x38) != 0x38) - STORE_REG_L_RELEASE(host_reg); + { + if ((fetchdat & 0xc0) != 0xc0) + { + LOAD_EA(); + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, host_reg); + } + else + { + STORE_REG_L_RELEASE(host_reg); + } + } else RELEASE_REG(host_reg); - + codegen_flags_changed = 1; return op_pc + 2; } diff --git a/src/codegen_ops_jump.h b/src/codegen_ops_jump.h index f58880abf..8dba96c7c 100644 --- a/src/codegen_ops_jump.h +++ b/src/codegen_ops_jump.h @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ static uint32_t ropJMP_r8(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) { uint32_t offset = fetchdat & 0xff; diff --git a/src/codegen_ops_logic.h b/src/codegen_ops_logic.h index 1cbbc5f3a..c0ffa641a 100644 --- a/src/codegen_ops_logic.h +++ b/src/codegen_ops_logic.h @@ -2,17 +2,36 @@ static uint32_t rop ## name ## _b_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ { \ int src_reg, dst_reg; \ + x86seg *target_seg; \ \ - if ((fetchdat & 0xc0) != 0xc0) \ - return 0; \ - \ - dst_reg = LOAD_REG_B(fetchdat & 7); \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_B(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_B_NO_ABRT(target_seg); \ + } \ STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN8); \ src_reg = LOAD_REG_B((fetchdat >> 3) & 7); \ op ## _HOST_REG_B(dst_reg, src_reg); \ STORE_HOST_REG_ADDR_BL((uintptr_t)&cpu_state.flags_res, dst_reg); \ - if (writeback) STORE_REG_B_RELEASE(dst_reg); \ - else RELEASE_REG(dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_B_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_B_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ RELEASE_REG(src_reg); \ \ return op_pc + 1; \ @@ -20,17 +39,36 @@ static uint32_t rop ## name ## _w_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ { \ int src_reg, dst_reg; \ + x86seg *target_seg; \ \ - if ((fetchdat & 0xc0) != 0xc0) \ - return 0; \ - \ - dst_reg = LOAD_REG_W(fetchdat & 7); \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_W(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_W(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_W_NO_ABRT(target_seg); \ + } \ STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN16); \ src_reg = LOAD_REG_W((fetchdat >> 3) & 7); \ op ## _HOST_REG_W(dst_reg, src_reg); \ STORE_HOST_REG_ADDR_WL((uintptr_t)&cpu_state.flags_res, dst_reg); \ - if (writeback) STORE_REG_W_RELEASE(dst_reg); \ - else RELEASE_REG(dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_W_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_W_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ RELEASE_REG(src_reg); \ \ return op_pc + 1; \ @@ -38,17 +76,36 @@ static uint32_t rop ## name ## _l_rmw(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ { \ int src_reg, dst_reg; \ + x86seg *target_seg; \ \ - if ((fetchdat & 0xc0) != 0xc0) \ - return 0; \ - \ - dst_reg = LOAD_REG_L(fetchdat & 7); \ + if ((fetchdat & 0xc0) == 0xc0) \ + { \ + dst_reg = LOAD_REG_L(fetchdat & 7); \ + } \ + else \ + { \ + target_seg = FETCH_EA(op_ea_seg, fetchdat, op_ssegs, &op_pc, op_32); \ + STORE_IMM_ADDR_L((uintptr_t)&cpu_state.oldpc, op_old_pc); \ + SAVE_EA(); \ + MEM_CHECK_WRITE_L(target_seg); \ + dst_reg = MEM_LOAD_ADDR_EA_L_NO_ABRT(target_seg); \ + } \ STORE_IMM_ADDR_L((uintptr_t)&cpu_state.flags_op, FLAGS_ZN32); \ src_reg = LOAD_REG_L((fetchdat >> 3) & 7); \ op ## _HOST_REG_L(dst_reg, src_reg); \ STORE_HOST_REG_ADDR((uintptr_t)&cpu_state.flags_res, dst_reg); \ - if (writeback) STORE_REG_L_RELEASE(dst_reg); \ - else RELEASE_REG(dst_reg); \ + if (writeback) \ + { \ + if ((fetchdat & 0xc0) == 0xc0) \ + STORE_REG_L_RELEASE(dst_reg); \ + else \ + { \ + LOAD_EA(); \ + MEM_STORE_ADDR_EA_L_NO_ABRT(target_seg, dst_reg); \ + } \ + } \ + else \ + RELEASE_REG(dst_reg); \ RELEASE_REG(src_reg); \ \ return op_pc + 1; \ diff --git a/src/codegen_ops_shift.h b/src/codegen_ops_shift.h index 21995f309..cedcf0c44 100644 --- a/src/codegen_ops_shift.h +++ b/src/codegen_ops_shift.h @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ #define SHIFT(size, size2, count, res_store) \ STORE_IMM_ADDR_L((uint32_t)&cpu_state.flags_op2, count); \ reg = LOAD_REG_ ## size(fetchdat & 7); \ diff --git a/src/codegen_ops_x86-64.h b/src/codegen_ops_x86-64.h index 5fd9923ba..d1747b9cf 100644 --- a/src/codegen_ops_x86-64.h +++ b/src/codegen_ops_x86-64.h @@ -1668,35 +1668,40 @@ static void MEM_STORE_ADDR_IMM_L(x86seg *seg, uint32_t addr, int host_reg) static void STORE_HOST_REG_ADDR_BL(uintptr_t addr, int host_reg) { + int temp_reg = REG_ECX; + + if (host_reg_mapping[REG_ECX] != -1) + temp_reg = REG_EBX; + if (host_reg & 0x10) { if (host_reg & 8) addbyte(0x41); - addbyte(0x0f); /*MOVZX EBX, host_reg*/ + addbyte(0x0f); /*MOVZX temp_reg, host_reg*/ addbyte(0xb7); - addbyte(0xc0 | (REG_ECX << 3) | (host_reg & 7)); - addbyte(0xc1); /*SHR EBX, 8*/ - addbyte(0xe8 | REG_ECX); + addbyte(0xc0 | (temp_reg << 3) | (host_reg & 7)); + addbyte(0xc1); /*SHR temp_reg, 8*/ + addbyte(0xe8 | temp_reg); addbyte(8); } else { if (host_reg & 8) addbyte(0x41); - addbyte(0x0f); /*MOVZX EBX, host_reg*/ + addbyte(0x0f); /*MOVZX temp_reg, host_reg*/ addbyte(0xb6); - addbyte(0xc0 | (REG_ECX << 3) | (host_reg & 7)); + addbyte(0xc0 | (temp_reg << 3) | (host_reg & 7)); } if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) { - addbyte(0x89); /*MOV addr, ECX*/ - addbyte(0x45 | (REG_ECX << 3)); + addbyte(0x89); /*MOV addr, temp_reg*/ + addbyte(0x45 | (temp_reg << 3)); addbyte((uint32_t)addr - (uint32_t)&cpu_state - 128); } else if (IS_32_ADDR(addr)) { - addbyte(0x89); /*MOV addr, ECX*/ - addbyte(0x04 | (REG_ECX << 3)); + addbyte(0x89); /*MOV addr, temp_reg*/ + addbyte(0x04 | (temp_reg << 3)); addbyte(0x25); addlong(addr); } @@ -1705,27 +1710,32 @@ static void STORE_HOST_REG_ADDR_BL(uintptr_t addr, int host_reg) addbyte(0x48); /*MOV RSI, addr*/ addbyte(0xb8 | REG_ESI); addquad((uint64_t)addr); - addbyte(0x89); /*MOV [RSI], ECX*/ - addbyte(0x0e); + addbyte(0x89); /*MOV [RSI], temp_reg*/ + addbyte(0x06 | (temp_reg << 3)); } } static void STORE_HOST_REG_ADDR_WL(uintptr_t addr, int host_reg) { + int temp_reg = REG_ECX; + + if (host_reg_mapping[REG_ECX] != -1) + temp_reg = REG_EBX; + if (host_reg & 8) addbyte(0x41); - addbyte(0x0f); /*MOVZX ECX, host_reg*/ + addbyte(0x0f); /*MOVZX temp_reg, host_reg*/ addbyte(0xb7); - addbyte(0xc0 | (REG_ECX << 3) | (host_reg & 7)); + addbyte(0xc0 | (temp_reg << 3) | (host_reg & 7)); if (addr >= (uintptr_t)&cpu_state && addr < ((uintptr_t)&cpu_state)+0x100) { - addbyte(0x89); /*MOV addr, ECX*/ - addbyte(0x45 | (REG_ECX << 3)); + addbyte(0x89); /*MOV addr, temp_reg*/ + addbyte(0x45 | (temp_reg << 3)); addbyte((uint32_t)addr - (uint32_t)&cpu_state - 128); } else if (IS_32_ADDR(addr)) { - addbyte(0x89); /*MOV addr, ECX*/ - addbyte(0x04 | (REG_ECX << 3)); + addbyte(0x89); /*MOV addr, temp_reg*/ + addbyte(0x04 | (temp_reg << 3)); addbyte(0x25); addlong(addr); } @@ -1734,8 +1744,8 @@ static void STORE_HOST_REG_ADDR_WL(uintptr_t addr, int host_reg) addbyte(0x48); /*MOV RSI, addr*/ addbyte(0xb8 | REG_ESI); addquad((uint64_t)addr); - addbyte(0x89); /*MOV [RSI], ECX*/ - addbyte(0x0e); + addbyte(0x89); /*MOV [RSI], temp_reg*/ + addbyte(0x06 | (temp_reg << 3)); } } static void STORE_HOST_REG_ADDR_W(uintptr_t addr, int host_reg) @@ -1926,69 +1936,6 @@ static void AND_HOST_REG_B(int dst_reg, int src_reg) addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } } -#if 0 - else if (dst_reg & 8) - { - if (dst_reg & 0x10) - { - if (src_reg & 0x10) - { - addbyte(0x66); /*SHL src_reg, 8*/ - addbyte(0xc1); - addbyte(0xe0 | src_reg); - addbyte(0x08); - addbyte(0x66); /*OR src_reg, 0xff*/ - addbyte(0x81); - addbyte(0xc8 | src_reg); - addword(0xff); - } - addbyte(0x66); /*ANDW dst_reg, src_reg*/ - addbyte(0x41); - addbyte(0x21); - addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); - } - else - { - addbyte(0x41); /*ANDB dst_reg, src_reg*/ - addbyte(0x20); - addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); - } - } - else if (src_reg & 8) - { - if (src_reg & 0x10) - { - addbyte(0x66); /*OR src_reg, 0xff*/ - addbyte(0x81); - addbyte(0xc8 | src_reg); - addword(0xff); - } - if (src_reg & 0x10) - { - addbyte(0x66); /*SHL src_reg, 8*/ - addbyte(0xc1); - addbyte(0xe0 | src_reg); - addbyte(0x08); - addbyte(0x66); /*OR src_reg, 0xff*/ - addbyte(0x81); - addbyte(0xc8 | src_reg); - addword(0xff); - } - addbyte(0x66); /*ANDW dst_reg, src_reg*/ - addbyte(0x41); - addbyte(0x21); - addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); - } - else - { - addbyte(0x41); /*ANDB dst_reg, src_reg*/ - addbyte(0x20); - addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); - } - } - else - fatal("!(dst_reg & src_reg & 8) b %i %i\n", dst_reg, src_reg); -#endif } static void AND_HOST_REG_W(int dst_reg, int src_reg) { @@ -2207,8 +2154,58 @@ static void OR_HOST_REG_B(int dst_reg, int src_reg) addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } } + else if (src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x41); /*MOVZX EBX, src_reg*/ + addbyte(0x0f); + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x08); /*ORB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x44); /*ORB dst_reg, src_reg*/ + addbyte(0x08); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } else - fatal("!(dst_reg & src_reg & 8)\n"); + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x0f); /*MOVZX EBX, src_reg*/ + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x08); /*ORB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x08); /*ORB dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } } static void OR_HOST_REG_W(int dst_reg, int src_reg) { @@ -2226,8 +2223,19 @@ static void OR_HOST_REG_W(int dst_reg, int src_reg) addbyte(0x09); addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } + else if (src_reg & 8) + { + addbyte(0x66); /*ORW dst_reg, src_reg*/ + addbyte(0x44); + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } else - fatal("!(dst_reg & src_reg & 8)\n"); + { + addbyte(0x66); /*ORW dst_reg, src_reg*/ + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } } static void OR_HOST_REG_L(int dst_reg, int src_reg) { @@ -2243,8 +2251,17 @@ static void OR_HOST_REG_L(int dst_reg, int src_reg) addbyte(0x09); addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } + else if (src_reg & 8) + { + addbyte(0x44); /*ORL dst_reg, src_reg*/ + addbyte(0x09); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } else - fatal("!(dst_reg & src_reg & 8)\n"); + { + addbyte(0x09); /*ORW dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } } static void OR_HOST_REG_IMM(int host_reg, uint32_t imm) { @@ -2264,7 +2281,12 @@ static void OR_HOST_REG_IMM(int host_reg, uint32_t imm) addlong(imm); } else - fatal("OR to bad register\n"); + { + addbyte(0x81); /*ORL host_reg, imm*/ + addbyte(0xc8 | (host_reg & 7)); + addlong(imm); + } +// fatal("OR to bad register\n"); } static void XOR_HOST_REG_B(int dst_reg, int src_reg) @@ -2336,8 +2358,58 @@ static void XOR_HOST_REG_B(int dst_reg, int src_reg) addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } } + else if (src_reg & 8) + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x41); /*MOVZX EBX, src_reg*/ + addbyte(0x0f); + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x30); /*XORB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x44); /*XORB dst_reg, src_reg*/ + addbyte(0x30); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } else - fatal("!(dst_reg & src_reg & 8)\n"); + { + if (dst_reg & 0x10) + { + addbyte(0xc1); /*SHR dst_reg, 8*/ + addbyte(0xe8 | (dst_reg & 7)); + addbyte(8); + } + if (src_reg & 0x10) + { + addbyte(0x0f); /*MOVZX EBX, src_reg*/ + addbyte(0xb7); + addbyte(0xd8 | (src_reg & 7)); + addbyte(0xc1); /*SHR EBX, 8*/ + addbyte(0xeb); + addbyte(8); + addbyte(0x30); /*XORB dst_reg, EBX*/ + addbyte(0xd8 | (dst_reg & 7)); + } + else + { + addbyte(0x30); /*XORB dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } } static void XOR_HOST_REG_W(int dst_reg, int src_reg) { @@ -2355,8 +2427,19 @@ static void XOR_HOST_REG_W(int dst_reg, int src_reg) addbyte(0x31); addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } + else if (src_reg & 8) + { + addbyte(0x66); /*XORW dst_reg, src_reg*/ + addbyte(0x44); + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } else - fatal("!(dst_reg & src_reg & 8)\n"); + { + addbyte(0x66); /*XORW dst_reg, src_reg*/ + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } } static void XOR_HOST_REG_L(int dst_reg, int src_reg) { @@ -2372,8 +2455,17 @@ static void XOR_HOST_REG_L(int dst_reg, int src_reg) addbyte(0x31); addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } + else if (src_reg & 8) + { + addbyte(0x44); /*XORW dst_reg, src_reg*/ + addbyte(0x31); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } else - fatal("!(dst_reg & src_reg & 8)\n"); + { + addbyte(0x31); /*XORW dst_reg, src_reg*/ + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } } static void XOR_HOST_REG_IMM(int host_reg, uint32_t imm) { @@ -2393,7 +2485,12 @@ static void XOR_HOST_REG_IMM(int host_reg, uint32_t imm) addlong(imm); } else - fatal("XOR to bad register\n"); + { + addbyte(0x81); /*ORL host_reg, imm*/ + addbyte(0xf0 | (host_reg & 7)); + addlong(imm); + } +// fatal("XOR to bad register\n"); } static void ADD_HOST_REG_B(int dst_reg, int src_reg) @@ -2465,6 +2562,28 @@ static void ADD_HOST_REG_B(int dst_reg, int src_reg) addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } } + else if (src_reg & 8) + { + if (src_reg & 0x10) + { + addbyte(0x66); /*MOVW AX, src_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0xc0 | ((src_reg & 7) << 3)); + addbyte(0x66); /*SHR AX, 8*/ + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + addbyte(0x00); /*ADDB dst_reg, AL*/ + addbyte(0xc0 | (dst_reg & 7)); + } + else + { + addbyte(0x44); /*ADDB dst_reg, src_reg*/ + addbyte(0x00); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } + } else fatal("!(dst_reg & src_reg & 8)\n"); } @@ -2484,6 +2603,13 @@ static void ADD_HOST_REG_W(int dst_reg, int src_reg) addbyte(0x01); addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } + else if (src_reg & 8) + { + addbyte(0x66); /*ADDW dst_reg, src_reg*/ + addbyte(0x44); + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } else fatal("!(dst_reg & src_reg & 8)\n"); } @@ -2501,6 +2627,12 @@ static void ADD_HOST_REG_L(int dst_reg, int src_reg) addbyte(0x01); addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); } + else if (src_reg & 8) + { + addbyte(0x44); /*ADDL dst_reg, src_reg*/ + addbyte(0x01); + addbyte(0xc0 | (dst_reg & 7) | ((src_reg & 7) << 3)); + } else fatal("!(dst_reg & src_reg & 8)\n"); } @@ -2737,15 +2869,17 @@ static void ADD_HOST_REG_IMM_B(int host_reg, uint8_t imm) if (host_reg & 0x10) { addbyte(0x66); /*ADDW host_reg, imm*/ - addbyte(0x41); + if (host_reg & 8) + addbyte(0x41); addbyte(0x81); addbyte(0xC0 | (host_reg & 7)); addword(imm << 8); } else { - addbyte(0x41); /*ADDB host_reg, imm*/ - addbyte(0x80); + if (host_reg & 8) + addbyte(0x41); + addbyte(0x80); /*ADDB host_reg, imm*/ addbyte(0xC0 | (host_reg & 7)); addbyte(imm); } @@ -2753,15 +2887,17 @@ static void ADD_HOST_REG_IMM_B(int host_reg, uint8_t imm) static void ADD_HOST_REG_IMM_W(int host_reg, uint16_t imm) { addbyte(0x66); /*ADDW host_reg, imm*/ - addbyte(0x41); + if (host_reg & 8) + addbyte(0x41); addbyte(0x81); addbyte(0xC0 | (host_reg & 7)); addword(imm); } static void ADD_HOST_REG_IMM(int host_reg, uint32_t imm) { - addbyte(0x41); /*ADDL host_reg, imm*/ - addbyte(0x81); + if (host_reg & 8) + addbyte(0x41); + addbyte(0x81); /*ADDL host_reg, imm*/ addbyte(0xC0 | (host_reg & 7)); addlong(imm); } @@ -3637,6 +3773,7 @@ static void FP_FXCH(int reg) addbyte(0x4c); addbyte(0xdd); addbyte(cpu_state_offset(MM)); + reg = reg; } @@ -4993,3 +5130,794 @@ static void MMX_PSLLQ_imm(int dst_reg, int amount) addbyte(0xc0 | dst_reg | 0x30); addbyte(amount); } + + +static void SAVE_EA() +{ + addbyte(0x89); /*MOV [ESP+0x24], EAX*/ + addbyte(0x44); + addbyte(0x24); + addbyte(0x24); +} +static void LOAD_EA() +{ + addbyte(0x8b); /*MOV EAX, [ESP+0x24]*/ + addbyte(0x44); + addbyte(0x24); + addbyte(0x24); +} + +static void MEM_CHECK_WRITE(x86seg *seg) +{ + uint8_t *jump1, *jump2; + + CHECK_SEG_WRITE(seg); + + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOV ESI, seg->base*/ + addbyte(0x34); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ESI, [RSI]*/ + addbyte(0x36); + } + + + /*seg = ESI, addr = EAX*/ + + addbyte(0x67); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x8d); + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3c); + addbyte(0x25); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x79); /*JNS +*/ + jump1 = &codeblock[block_current].data[block_pos]; + addbyte(0); +// addbyte(0xc3); /*RET*/ + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(10); + addbyte(0x83); /*CMP writelookup2[RDI*8],-1*/ + addbyte(0x3c); + addbyte(0xfd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x75); /*JNE +*/ + jump2 = &codeblock[block_current].data[block_pos]; + addbyte(0); +// addbyte(0xc3); /*RET*/ + + /*slowpath:*/ + addbyte(0x67); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x8d); + addbyte(0x3c); + addbyte(0x30); + + + load_param_1_reg_32(REG_EDI); + load_param_2_32(&codeblock[block_current], 1); + + +// addbyte(0x6a); /*PUSH 1*/ +// addbyte(1); +// addbyte(0x57); /*PUSH EDI*/ + call(&codeblock[block_current], mmutranslatereal); +// addbyte(0x48); /*ADD ESP, 8*/ +// addbyte(0x83); +// addbyte(0xc4); +// addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong((uintptr_t)&codeblock[block_current].data[BLOCK_EXIT_OFFSET] - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); +// addbyte(0xc3); /*RET*/ + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; + *jump2 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump2 - 1; + + + +// addbyte(0xe8); /*CALL mem_store_addr_ea_b*/ +// addlong(mem_check_write - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + LOAD_EA(); +} + +static void MEM_CHECK_WRITE_W(x86seg *seg) +{ + uint8_t *jump1, *jump2, *jump3, *jump4; + int jump_pos; + + CHECK_SEG_WRITE(seg); + + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOV ESI, seg->base*/ + addbyte(0x34); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ESI, [RSI]*/ + addbyte(0x36); + } + + + /*seg = ESI, addr = EAX*/ + + addbyte(0x67); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x8d); + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3c); + addbyte(0x25); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x79); /*JNS +*/ + jump1 = &codeblock[block_current].data[block_pos]; + addbyte(0); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x8d); /*LEA ESI, 1[EDI]*/ + addbyte(0x77); + addbyte(0x01); + addbyte(0x74); /*JE slowpath*/ + jump4 = &codeblock[block_current].data[block_pos]; + addbyte(0); + addbyte(0x89); /*MOV EBX, EDI*/ + addbyte(0xfb); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xee); + addbyte(12); + addbyte(0x83); /*CMP writelookup2[RDI*8],-1*/ + addbyte(0x3c); + addbyte(0xfd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + jump2 = &codeblock[block_current].data[block_pos]; + addbyte(0); + addbyte(0x83); /*CMP writelookup2[RSI*8],-1*/ + addbyte(0x3c); + addbyte(0xf5); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x75); /*JNE +*/ + jump3 = &codeblock[block_current].data[block_pos]; + addbyte(0); + + /*slowpath:*/ + *jump2 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump2 - 1; + *jump4 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump4 - 1; + jump_pos = block_pos; + load_param_1_reg_32(REG_EBX); + load_param_2_32(&codeblock[block_current], 1); + call(&codeblock[block_current], mmutranslatereal); + addbyte(0x83); /*ADD EBX, 1*/ + addbyte(0xc3); + addbyte(1); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong((uintptr_t)&codeblock[block_current].data[BLOCK_EXIT_OFFSET] - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + /*If bits 0-11 of the address are now 0 then this crosses a page, so loop back*/ + addbyte(0xf7); /*TEST $fff, EBX*/ + addbyte(0xc3); + addlong(0xfff); + addbyte(0x74); /*JNE slowpath*/ + addbyte(jump_pos - block_pos - 1); + + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; + *jump3 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump3 - 1; + + LOAD_EA(); +} + +static void MEM_CHECK_WRITE_L(x86seg *seg) +{ + uint8_t *jump1, *jump2, *jump3, *jump4; + int jump_pos; + + CHECK_SEG_WRITE(seg); + + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOV ESI, seg->base*/ + addbyte(0x34); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &addr*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ESI, [RSI]*/ + addbyte(0x36); + } + + + /*seg = ESI, addr = EAX*/ + + addbyte(0x67); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x8d); + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3c); + addbyte(0x25); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x79); /*JNS +*/ + jump1 = &codeblock[block_current].data[block_pos]; + addbyte(0); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x8d); /*LEA ESI, 3[EDI]*/ + addbyte(0x77); + addbyte(0x03); + addbyte(0x74); /*JE slowpath*/ + jump4 = &codeblock[block_current].data[block_pos]; + addbyte(0); + addbyte(0x89); /*MOV EBX, EDI*/ + addbyte(0xfb); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xee); + addbyte(12); + addbyte(0x83); /*CMP writelookup2[RDI*8],-1*/ + addbyte(0x3c); + addbyte(0xfd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + jump2 = &codeblock[block_current].data[block_pos]; + addbyte(0); + addbyte(0x83); /*CMP writelookup2[RSI*8],-1*/ + addbyte(0x3c); + addbyte(0xf5); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x75); /*JNE +*/ + jump3 = &codeblock[block_current].data[block_pos]; + addbyte(0); + + /*slowpath:*/ + *jump2 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump2 - 1; + *jump4 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump4 - 1; + jump_pos = block_pos; + load_param_1_reg_32(REG_EBX); + load_param_2_32(&codeblock[block_current], 1); + call(&codeblock[block_current], mmutranslatereal); + addbyte(0x83); /*ADD EBX, 3*/ + addbyte(0xc3); + addbyte(3); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong((uintptr_t)&codeblock[block_current].data[BLOCK_EXIT_OFFSET] - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + /*If bits 0-11 of the address are now 0 then this crosses a page, so loop back*/ + addbyte(0xf7); /*TEST $ffc, EBX*/ + addbyte(0xc3); + addlong(0xffc); + addbyte(0x74); /*JE slowpath*/ + addbyte(jump_pos - block_pos - 1); + + *jump1 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump1 - 1; + *jump3 = (uintptr_t)&codeblock[block_current].data[block_pos] - (uintptr_t)jump3 - 1; + + LOAD_EA(); +} + +static int MEM_LOAD_ADDR_EA_B_NO_ABRT(x86seg *seg) +{ + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2); + addbyte(0x8b); /*MOV AL,[RDI+RSI]*/ + addbyte(0x04); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long(readmemb386l); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc1); + /*done:*/ + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} +static int MEM_LOAD_ADDR_EA_W_NO_ABRT(x86seg *seg) +{ + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x67); /*LEA EDI, 1[ESI]*/ + addbyte(0x8d); + addbyte(0x7e); + addbyte(0x01); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 0xfff*/ + addbyte(0xc7); + addlong(0xfff); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2+5+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(5+2); + addbyte(0x66); /*MOV AX,-1[RDI+RSI]*/ + addbyte(0x8b); + addbyte(0x44); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(-1); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long(readmemwl); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc1); + /*done:*/ + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} +static int MEM_LOAD_ADDR_EA_L_NO_ABRT(x86seg *seg) +{ + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL ECX, seg->base*/ + addbyte(0x0c); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV ECX, [RSI]*/ + addbyte(0x0e); + } + addbyte(0x67); /*LEA ESI, (EAX,ECX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x08); + addbyte(0x67); /*LEA EDI, 3[ESI]*/ + addbyte(0x8d); + addbyte(0x7e); + addbyte(0x03); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 0xffc*/ + addbyte(0xc7); + addlong(0xffc); + if (IS_32_ADDR(readlookup2)) + { + addbyte(0x67); /*MOV RSI, readlookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)readlookup2); + } + else + { + addbyte(0x48); /*MOV RDX, readlookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)readlookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2+4+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+2); + addbyte(0x8b); /*MOV EAX,-3[RDI+RSI]*/ + addbyte(0x44); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(-3); + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+12); + /*slowpath:*/ + load_param_1_reg_32(REG_ECX); + load_param_2_reg_32(REG_EAX); + call_long(readmemll); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc1); + /*done:*/ + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} + +static void MEM_STORE_ADDR_EA_B_NO_ABRT(x86seg *seg, int host_reg) +{ + if (host_reg & 0x10) + { + /*Handle high byte of register*/ + if (host_reg & 8) + { + addbyte(0x45); /*MOVL R8, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3)); + } + else + { + addbyte(0x41); /*MOVL R8, host_reg*/ + addbyte(0x89); + addbyte(0xc0 | ((host_reg & 7) << 3)); + } + addbyte(0x66); /*SHR R8, 8*/ + addbyte(0x41); + addbyte(0xc1); + addbyte(0xe8); + addbyte(8); + host_reg = 8; + } + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL EBX, seg->base*/ + addbyte(0x1c); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV EBX, [RSI]*/ + addbyte(0x1e); + } + addbyte(0x67); /*LEA ESI, (EAX,EBX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x18); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xf7); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 4:3)+2); + if (host_reg & 8) + { + addbyte(0x44); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x88); + addbyte(0x04 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + else + { + addbyte(0x88); /*MOV [RDI+RSI],host_reg*/ + addbyte(0x04 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12); + /*slowpath:*/ + load_param_3_reg_32(host_reg); + load_param_1_reg_32(REG_EBX); + load_param_2_reg_32(REG_EAX); + call_long(writememb386l); + /*done:*/ +} +static void MEM_STORE_ADDR_EA_W_NO_ABRT(x86seg *seg, int host_reg) +{ + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL EBX, seg->base*/ + addbyte(0x1c); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV EBX, [RSI]*/ + addbyte(0x1e); + } + addbyte(0x67); /*LEA ESI, (EAX,EBX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x18); + addbyte(0x67); /*LEA EDI, 1[ESI]*/ + addbyte(0x8d); + addbyte(0x7e); + addbyte(0x01); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 0xfff*/ + addbyte(0xc7); + addlong(0xfff); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2+((host_reg & 8) ? 6:5)+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 6:5)+2); + if (host_reg & 8) + { + addbyte(0x66); /*MOV -1[RDI+RSI],host_reg*/ + addbyte(0x44); + addbyte(0x89); + addbyte(0x44 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(-1); + } + else + { + addbyte(0x66); /*MOV -1[RDI+RSI],host_reg*/ + addbyte(0x89); + addbyte(0x44 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(-1); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12); + /*slowpath:*/ + load_param_3_reg_32(host_reg); + load_param_1_reg_32(REG_EBX); + load_param_2_reg_32(REG_EAX); + call_long(writememwl); + /*done:*/ +} +static void MEM_STORE_ADDR_EA_L_NO_ABRT(x86seg *seg, int host_reg) +{ + if (IS_32_ADDR(&seg->base)) + { + addbyte(0x8b); /*MOVL EBX, seg->base*/ + addbyte(0x1c); + addbyte(0x25); + addlong((uint32_t)&seg->base); + } + else + { + addbyte(0x48); /*MOV RSI, &seg->base*/ + addbyte(0xb8 | REG_ESI); + addquad((uint64_t)&seg->base); + addbyte(0x8b); /*MOV EBX, [RSI]*/ + addbyte(0x1e); + } + addbyte(0x67); /*LEA ESI, (EAX,EBX)*/ + addbyte(0x8d); + addbyte(0x34); + addbyte(0x18); + addbyte(0x67); /*LEA EDI, 3[ESI]*/ + addbyte(0x8d); + addbyte(0x7e); + addbyte(0x03); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 0xffc*/ + addbyte(0xc7); + addlong(0xffc); + if (IS_32_ADDR(writelookup2)) + { + addbyte(0x67); /*MOV RSI, writelookup2[ESI*8]*/ + addbyte(0x48); + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf5); + addlong((uint32_t)writelookup2); + } + else + { + addbyte(0x48); /*MOV RDX, writelookup2*/ + addbyte(0xb8 | REG_EDX); + addquad((uint64_t)writelookup2); + addbyte(0x48); /*MOV RSI, [RDX+RSI*8]*/ + addbyte(0x8b); + addbyte(0x34); + addbyte(0xf2); + } + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2+((host_reg & 8) ? 5:4)+2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(((host_reg & 8) ? 5:4)+2); + if (host_reg & 8) + { + addbyte(0x44); /*MOV -3[RDI+RSI],host_reg*/ + addbyte(0x89); + addbyte(0x44 | ((host_reg & 7) << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(-3); + } + else + { + addbyte(0x89); /*MOV -3[RDI+RSI],host_reg*/ + addbyte(0x44 | (host_reg << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(-3); + } + addbyte(0xeb); /*JMP done*/ + addbyte(2+2+3+12); + /*slowpath:*/ + load_param_3_reg_32(host_reg); + load_param_1_reg_32(REG_EBX); + load_param_2_reg_32(REG_EAX); + call_long(writememll); + /*done:*/ +} diff --git a/src/codegen_ops_x86.h b/src/codegen_ops_x86.h index 09e1b8896..2858ae59b 100644 --- a/src/codegen_ops_x86.h +++ b/src/codegen_ops_x86.h @@ -671,6 +671,18 @@ static void MEM_LOAD_ADDR_EA_B(x86seg *seg) host_reg_mapping[0] = 8; } +static int MEM_LOAD_ADDR_EA_B_NO_ABRT(x86seg *seg) +{ + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + addbyte(0xe8); /*CALL mem_load_addr_ea_b_no_abrt*/ + addlong(mem_load_addr_ea_b_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} static void MEM_LOAD_ADDR_EA_W(x86seg *seg) { addbyte(0x8b); /*MOVL EDX, seg->base*/ @@ -681,6 +693,18 @@ static void MEM_LOAD_ADDR_EA_W(x86seg *seg) host_reg_mapping[0] = 8; } +static int MEM_LOAD_ADDR_EA_W_NO_ABRT(x86seg *seg) +{ + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + addbyte(0xe8); /*CALL mem_load_addr_ea_w_no_abrt*/ + addlong(mem_load_addr_ea_w_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} static void MEM_LOAD_ADDR_EA_L(x86seg *seg) { addbyte(0x8b); /*MOVL EDX, seg->base*/ @@ -692,6 +716,18 @@ static void MEM_LOAD_ADDR_EA_L(x86seg *seg) host_reg_mapping[0] = 8; } +static int MEM_LOAD_ADDR_EA_L_NO_ABRT(x86seg *seg) +{ + addbyte(0x8b); /*MOVL EDX, seg->base*/ + addbyte(0x05 | (REG_EDX << 3)); + addlong((uint32_t)&seg->base); + addbyte(0xe8); /*CALL mem_load_addr_ea_l_no_abrt*/ + addlong(mem_load_addr_ea_l_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + + host_reg_mapping[REG_ECX] = 8; + + return REG_ECX; +} static void MEM_LOAD_ADDR_EA_Q(x86seg *seg) { @@ -736,6 +772,19 @@ static void MEM_STORE_ADDR_EA_B(x86seg *seg, int host_reg) addbyte(0xe8); /*CALL mem_store_addr_ea_b*/ addlong(mem_store_addr_ea_b - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); } +static void MEM_STORE_ADDR_EA_B_NO_ABRT(x86seg *seg, int host_reg) +{ + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_b_no_abrt*/ + addlong(mem_store_addr_ea_b_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} static void MEM_STORE_ADDR_EA_W(x86seg *seg, int host_reg) { addbyte(0x8b); /*MOVL ESI, seg->base*/ @@ -749,6 +798,19 @@ static void MEM_STORE_ADDR_EA_W(x86seg *seg, int host_reg) addbyte(0xe8); /*CALL mem_store_addr_ea_w*/ addlong(mem_store_addr_ea_w - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); } +static void MEM_STORE_ADDR_EA_W_NO_ABRT(x86seg *seg, int host_reg) +{ + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_w_no_abrt*/ + addlong(mem_store_addr_ea_w_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} static void MEM_STORE_ADDR_EA_L(x86seg *seg, int host_reg) { addbyte(0x8b); /*MOVL ESI, seg->base*/ @@ -762,6 +824,19 @@ static void MEM_STORE_ADDR_EA_L(x86seg *seg, int host_reg) addbyte(0xe8); /*CALL mem_store_addr_ea_l*/ addlong(mem_store_addr_ea_l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); } +static void MEM_STORE_ADDR_EA_L_NO_ABRT(x86seg *seg, int host_reg) +{ + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + if (host_reg != REG_ECX) + { + addbyte(0x89); /*MOV ECX, host_reg*/ + addbyte(0xc0 | REG_ECX | (host_reg << 3)); + } + addbyte(0xe8); /*CALL mem_store_addr_ea_l_no_abrt*/ + addlong(mem_store_addr_ea_l_no_abrt - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); +} static void MEM_STORE_ADDR_EA_Q(x86seg *seg, int host_reg, int host_reg2) { if (host_reg != REG_EBX) @@ -1802,6 +1877,7 @@ static void FP_LOAD_S() addbyte(0xdd); /*FSTP ST[reg][EBP]*/ addbyte(0x5d); addbyte(cpu_state_offset(ST[(cpu_state.TOP - 1) & 7])); + block_current = block_current; } else { @@ -3540,3 +3616,50 @@ static void MMX_PSLLQ_imm(int dst_reg, int amount) addbyte(0xc0 | dst_reg | 0x30); addbyte(amount); } + + +static void SAVE_EA() +{ + addbyte(0x89); /*MOV [ESP+12], EAX*/ + addbyte(0x44); + addbyte(0x24); + addbyte(12); +} +static void LOAD_EA() +{ + addbyte(0x8b); /*MOV EAX, [ESP+12]*/ + addbyte(0x44); + addbyte(0x24); + addbyte(12); +} + +static void MEM_CHECK_WRITE(x86seg *seg) +{ + CHECK_SEG_WRITE(seg); + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + addbyte(0xe8); /*CALL mem_check_write*/ + addlong(mem_check_write - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + LOAD_EA(); +} +static void MEM_CHECK_WRITE_W(x86seg *seg) +{ + CHECK_SEG_WRITE(seg); + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + addbyte(0xe8); /*CALL mem_check_write_w*/ + addlong(mem_check_write_w - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + LOAD_EA(); +} +static void MEM_CHECK_WRITE_L(x86seg *seg) +{ + CHECK_SEG_WRITE(seg); + addbyte(0x8b); /*MOVL ESI, seg->base*/ + addbyte(0x05 | (REG_ESI << 3)); + addlong((uint32_t)&seg->base); + addbyte(0xe8); /*CALL mem_check_write_l*/ + addlong(mem_check_write_l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + LOAD_EA(); +} diff --git a/src/codegen_ops_xchg.h b/src/codegen_ops_xchg.h index 6d79bccca..0e937d187 100644 --- a/src/codegen_ops_xchg.h +++ b/src/codegen_ops_xchg.h @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ #define OP_XCHG_AX_(reg) \ static uint32_t ropXCHG_AX_ ## reg(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) \ { \ @@ -47,9 +44,9 @@ OP_XCHG_EAX_(EBP) static uint32_t ropXCHG_b(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) { -// #ifdef __amd64__ -// return 0; -// #else +#ifdef __amd64__ + return 0; +#else int src_reg, dst_reg, temp_reg; if ((fetchdat & 0xc0) != 0xc0) @@ -62,7 +59,7 @@ static uint32_t ropXCHG_b(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uin STORE_REG_TARGET_B_RELEASE(temp_reg, fetchdat & 7); return op_pc + 1; -// #endif +#endif } static uint32_t ropXCHG_w(uint8_t opcode, uint32_t fetchdat, uint32_t op_32, uint32_t op_pc, codeblock_t *block) { diff --git a/src/codegen_timing_486.c b/src/codegen_timing_486.c index 83b07bc2e..0cd8c25b7 100644 --- a/src/codegen_timing_486.c +++ b/src/codegen_timing_486.c @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ #include "ibm.h" #include "cpu.h" #include "x86.h" diff --git a/src/codegen_timing_686.c b/src/codegen_timing_686.c index 9f93a5652..5b2b5eda5 100644 --- a/src/codegen_timing_686.c +++ b/src/codegen_timing_686.c @@ -1,6 +1,3 @@ -/* Copyright holders: leilei - see COPYING for more details -*/ /*Elements taken into account : - X/Y pairing - FPU/FXCH pairing diff --git a/src/codegen_timing_pentium.c b/src/codegen_timing_pentium.c index 4bb5b1d2e..31d4cafe7 100644 --- a/src/codegen_timing_pentium.c +++ b/src/codegen_timing_pentium.c @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ /*Elements taken into account : - U/V integer pairing - FPU/FXCH pairing diff --git a/src/codegen_timing_winchip.c b/src/codegen_timing_winchip.c index 6fdf2ba65..342fbb0b6 100644 --- a/src/codegen_timing_winchip.c +++ b/src/codegen_timing_winchip.c @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ #include "ibm.h" #include "cpu.h" #include "x86.h" diff --git a/src/codegen_x86-64.h b/src/codegen_x86-64.h index a52a913ef..e8cd97015 100644 --- a/src/codegen_x86-64.h +++ b/src/codegen_x86-64.h @@ -1,6 +1,3 @@ -/* Copyright holders: Sarah Walker - see COPYING for more details -*/ #define BLOCK_SIZE 0x4000 #define BLOCK_MASK 0x3fff #define BLOCK_START 0 @@ -13,6 +10,8 @@ #define BLOCK_EXIT_OFFSET 0x7e0 #define BLOCK_GPF_OFFSET (BLOCK_EXIT_OFFSET - 20) +#define BLOCK_MAX 1650 + enum { OP_RET = 0xc3 diff --git a/src/codegen_x86.c b/src/codegen_x86.c index 7d2a7a9f9..46f2d79e7 100644 --- a/src/codegen_x86.c +++ b/src/codegen_x86.c @@ -68,6 +68,15 @@ uint32_t mem_store_addr_ea_b; uint32_t mem_store_addr_ea_w; uint32_t mem_store_addr_ea_l; uint32_t mem_store_addr_ea_q; +uint32_t mem_load_addr_ea_b_no_abrt; +uint32_t mem_store_addr_ea_b_no_abrt; +uint32_t mem_load_addr_ea_w_no_abrt; +uint32_t mem_store_addr_ea_w_no_abrt; +uint32_t mem_load_addr_ea_l_no_abrt; +uint32_t mem_store_addr_ea_l_no_abrt; +uint32_t mem_check_write; +uint32_t mem_check_write_w; +uint32_t mem_check_write_l; static uint32_t gen_MEM_LOAD_ADDR_EA_B() { @@ -505,6 +514,604 @@ static uint32_t gen_MEM_STORE_ADDR_EA_Q() return addr; } +static char gen_MEM_LOAD_ADDR_EA_B_NO_ABRT_err[] = "gen_MEM_LOAD_ADDR_EA_B_NO_ABRT aborted\n"; +static uint32_t gen_MEM_LOAD_ADDR_EA_B_NO_ABRT() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x89); /*MOV EDI, EDX*/ + addbyte(0xd7); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x0f); /*MOVZX ECX, B[EDX+EDI]*/ + addbyte(0xb6); + addbyte(0x0c); + addbyte(0x3a); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmembl*/ + addlong((uint32_t)readmemb386l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*MOVZX ECX, AL*/ + addbyte(0xb6); + addbyte(0xc8); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + addbyte(0xc7); /*MOV [ESP], gen_MEM_LOAD_ADDR_EA_B_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong(gen_MEM_LOAD_ADDR_EA_B_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ + + return addr; +} + +static char gen_MEM_LOAD_ADDR_EA_W_NO_ABRT_err[] = "gen_MEM_LOAD_ADDR_EA_W_NO_ABRT aborted\n"; +static uint32_t gen_MEM_LOAD_ADDR_EA_W_NO_ABRT() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x8d); /*LEA EDI, 1[EDX]*/ + addbyte(0x7a); + addbyte(0x01); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 0xfff*/ + addbyte(0xc7); + addlong(0xfff); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2+5+1); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(5+1); + addbyte(0x0f); /*MOVZX EEX, -1[EDX+EDI]W*/ + addbyte(0xb7); + addbyte(0x4c); + addbyte(0x3a); + addbyte(-1); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmemwl*/ + addlong((uint32_t)readmemwl - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*MOVZX ECX, AX*/ + addbyte(0xb7); + addbyte(0xc8); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + addbyte(0xc7); /*MOV [ESP], gen_MEM_LOAD_ADDR_EA_W_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong(gen_MEM_LOAD_ADDR_EA_W_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ + + return addr; +} + +static char gen_MEM_LOAD_ADDR_EA_L_NO_ABRT_err[] = "gen_MEM_LOAD_ADDR_EA_L_NO_ABRT aborted\n"; +static uint32_t gen_MEM_LOAD_ADDR_EA_L_NO_ABRT() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + addbyte(0x89); /*MOV ESI, EDX*/ + addbyte(0xd6); + addbyte(0x01); /*ADDL EDX, EAX*/ + addbyte(0xc2); + addbyte(0x8d); /*LEA EDI, 3[EDX]*/ + addbyte(0x7a); + addbyte(0x03); + addbyte(0xc1); /*SHR EDX, 12*/ + addbyte(0xea); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 0xffc*/ + addbyte(0xc7); + addlong(0xffc); + addbyte(0x8b); /*MOV EDX, readlookup2[EDX*4]*/ + addbyte(0x14); + addbyte(0x95); + addlong((uint32_t)readlookup2); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2+4+1); + addbyte(0x83); /*CMP EDX, -1*/ + addbyte(0xfa); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x8b); /*MOV ECX, -3[EDX+EDI]*/ + addbyte(0x4c); + addbyte(0x3a); + addbyte(-3); + addbyte(0xc3); /*RET*/ + + addbyte(0x50); /*slowpath: PUSH EAX*/ + addbyte(0x56); /*PUSH ESI*/ + addbyte(0xe8); /*CALL readmemll*/ + addlong((uint32_t)readmemll - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x89); /*MOV ECX, EAX*/ + addbyte(0xc1); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + addbyte(0x83); /*SUBL 4,%esp*/ + addbyte(0xEC); + addbyte(4); + addbyte(0xc7); /*MOV [ESP], gen_MEM_LOAD_ADDR_EA_L_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong(gen_MEM_LOAD_ADDR_EA_L_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ + + return addr; +} + +static char gen_MEM_STORE_ADDR_EA_B_NO_ABRT_err[] = "gen_MEM_STORE_ADDR_EA_B_NO_ABRT aborted\n"; +static uint32_t gen_MEM_STORE_ADDR_EA_B_NO_ABRT() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x89); /*MOV EDI, ESI*/ + addbyte(0xc0 | (REG_ESI << 3) | REG_EDI); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+1); + addbyte(0x88); /*MOV [EDI+ESI],CL*/ + addbyte(0x04 | (REG_ECX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememb386l*/ + addlong((uint32_t)writememb386l - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + addbyte(0xc7); /*MOV [ESP], gen_MEM_STORE_ADDR_EA_B_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong(gen_MEM_STORE_ADDR_EA_B_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ + + return addr; +} + +static char gen_MEM_STORE_ADDR_EA_W_NO_ABRT_err[] = "gen_MEM_STORE_ADDR_EA_W_NO_ABRT aborted\n"; +static uint32_t gen_MEM_STORE_ADDR_EA_W_NO_ABRT() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x8d); /*LEA EDI, 1[ESI]*/ + addbyte(0x7e); + addbyte(0x01); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 0xfff*/ + addbyte(0xc7); + addlong(0xfff); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2+5+1); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(5+1); + addbyte(0x66); /*MOV -1[EDI+ESI],CX*/ + addbyte(0x89); + addbyte(0x44 | (REG_CX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(-1); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememwl*/ + addlong((uint32_t)writememwl - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + addbyte(0xc7); /*MOV [ESP], gen_MEM_STORE_ADDR_EA_W_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong(gen_MEM_STORE_ADDR_EA_W_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ + + return addr; +} + +static char gen_MEM_STORE_ADDR_EA_L_NO_ABRT_err[] = "gen_MEM_STORE_ADDR_EA_L_NO_ABRT aborted\n"; +static uint32_t gen_MEM_STORE_ADDR_EA_L_NO_ABRT() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + /*dat = ECX, seg = ESI, addr = EAX*/ + addbyte(0x89); /*MOV EBX, ESI*/ + addbyte(0xf3); + addbyte(0x01); /*ADDL ESI, EAX*/ + addbyte(0xc0 | (REG_EAX << 3) | REG_ESI); + addbyte(0x8d); /*LEA EDI, 3[ESI]*/ + addbyte(0x7e); + addbyte(0x03); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xe8 | REG_ESI); + addbyte(12); + addbyte(0xf7); /*TEST EDI, 0xffc*/ + addbyte(0xc7); + addlong(0xffc); + addbyte(0x8b); /*MOV ESI, readlookup2[ESI*4]*/ + addbyte(0x04 | (REG_ESI << 3)); + addbyte(0x85 | (REG_ESI << 3)); + addlong((uint32_t)writelookup2); + addbyte(0x74); /*JE slowpath*/ + addbyte(3+2+4+1); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xf8 | REG_ESI); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(4+1); + addbyte(0x89); /*MOV -3[EDI+ESI],ECX*/ + addbyte(0x44 | (REG_ECX << 3)); + addbyte(REG_EDI | (REG_ESI << 3)); + addbyte(-3); + addbyte(0xc3); /*RET*/ + + addbyte(0x51); /*slowpath: PUSH ECX*/ + addbyte(0x50); /*PUSH EAX*/ + addbyte(0x53); /*PUSH EBX*/ + addbyte(0xe8); /*CALL writememll*/ + addlong((uint32_t)writememll - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 12*/ + addbyte(0xc4); + addbyte(12); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x75); /*JNE mem_abrt_rout*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + addbyte(0xc7); /*MOV [ESP], gen_MEM_STORE_ADDR_EA_W_NO_ABRT_err*/ + addbyte(0x04); + addbyte(0x24); + addlong(gen_MEM_STORE_ADDR_EA_W_NO_ABRT_err); + addbyte(0xe8); /*CALL fatal*/ + addlong((uint32_t)fatal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + /*Should not return!*/ + + return addr; +} + +static uint32_t gen_MEM_CHECK_WRITE() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + /*seg = ESI, addr = EAX*/ + + addbyte(0x8d); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3d); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x78); /*JS +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x74); /*JE slowpath*/ + addbyte(11); + addbyte(0x83); /*CMP writelookup2[EDI*4],-1*/ + addbyte(0x3c); + addbyte(0xbd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + /*slowpath:*/ + addbyte(0x8d); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x3c); + addbyte(0x30); + addbyte(0x6a); /*PUSH 1*/ + addbyte(1); + addbyte(0x57); /*PUSH EDI*/ + addbyte(0xe8); /*CALL mmutranslatereal*/ + addlong((uint32_t)mmutranslatereal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x83); /*ADD ESP, 8*/ + addbyte(0xc4); + addbyte(8); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + addbyte(0xc3); /*RET*/ + + return addr; +} + +/*static void checkdebug(uint32_t a) +{ + pclog("checkdebug %08x\n", a); +}*/ + +static uint32_t gen_MEM_CHECK_WRITE_W() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + /*seg = ESI, addr = EAX*/ + + addbyte(0x8d); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3d); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x78); /*JS +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x8d); /*LEA ESI, 1[EDI]*/ + addbyte(0x77); + addbyte(0x01); + addbyte(0x74); /*JE slowpath*/ + addbyte(11); + addbyte(0x89); /*MOV EAX, EDI*/ + addbyte(0xf8); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xee); + addbyte(12); + addbyte(0x83); /*CMP writelookup2[EDI*4],-1*/ + addbyte(0x3c); + addbyte(0xbd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(11); + addbyte(0x83); /*CMP writelookup2[ESI*4],-1*/ + addbyte(0x3c); + addbyte(0xb5); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + /*slowpath:*/ + addbyte(0x89); /*MOV EDI, EAX*/ + addbyte(0xc7); + /*slowpath_lp:*/ + addbyte(0x6a); /*PUSH 1*/ + addbyte(1); + addbyte(0x57); /*PUSH EDI*/ + addbyte(0xe8); /*CALL mmutranslatereal*/ + addlong((uint32_t)mmutranslatereal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x5f); /*POP EDI*/ + addbyte(0x83); /*ADD ESP, 4*/ + addbyte(0xc4); + addbyte(4); + addbyte(0x83); /*ADD EDI, 1*/ + addbyte(0xc7); + addbyte(1); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + /*If bits 0-11 of the address are now 0 then this crosses a page, so loop back*/ + addbyte(0xf7); /*TEST $fff, EDI*/ + addbyte(0xc7); + addlong(0xfff); + addbyte(0x74); /*JE slowpath_lp*/ + addbyte(-33); + addbyte(0xc3); /*RET*/ + + return addr; +} + +static uint32_t gen_MEM_CHECK_WRITE_L() +{ + uint32_t addr = &codeblock[block_current].data[block_pos]; + + /*seg = ESI, addr = EAX*/ + + addbyte(0x8d); /*LEA EDI, [EAX+ESI]*/ + addbyte(0x3c); + addbyte(0x30); + addbyte(0x83); /*CMP cr0, 0*/ + addbyte(0x3d); + addlong((uint32_t)&cr0); + addbyte(0); + addbyte(0x78); /*JS +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + addbyte(0x83); /*CMP ESI, -1*/ + addbyte(0xfe); + addbyte(-1); + addbyte(0x8d); /*LEA ESI, 3[EDI]*/ + addbyte(0x77); + addbyte(0x03); + addbyte(0x74); /*JE slowpath*/ + addbyte(11); + addbyte(0x89); /*MOV EAX, EDI*/ + addbyte(0xf8); + addbyte(0xc1); /*SHR EDI, 12*/ + addbyte(0xef); + addbyte(12); + addbyte(0xc1); /*SHR ESI, 12*/ + addbyte(0xee); + addbyte(12); + addbyte(0x83); /*CMP writelookup2[EDI*4],-1*/ + addbyte(0x3c); + addbyte(0xbd); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(11); + addbyte(0x83); /*CMP writelookup2[ESI*4],-1*/ + addbyte(0x3c); + addbyte(0xb5); + addlong((uint32_t)writelookup2); + addbyte(-1); + addbyte(0x74); /*JE +*/ + addbyte(1); + addbyte(0xc3); /*RET*/ + + /*slowpath:*/ + addbyte(0x89); /*MOV EDI, EAX*/ + addbyte(0xc7); + /*slowpath_lp:*/ + addbyte(0x6a); /*PUSH 1*/ + addbyte(1); + addbyte(0x57); /*PUSH EDI*/ + addbyte(0xe8); /*CALL mmutranslatereal*/ + addlong((uint32_t)mmutranslatereal - (uint32_t)(&codeblock[block_current].data[block_pos + 4])); + addbyte(0x5f); /*POP EDI*/ + addbyte(0x83); /*ADD ESP, 4*/ + addbyte(0xc4); + addbyte(4); + addbyte(0x83); /*ADD EDI, 3*/ + addbyte(0xc7); + addbyte(3); + addbyte(0x80); /*CMP abrt, 0*/ + addbyte(0x7d); + addbyte(cpu_state_offset(abrt)); + addbyte(0); + addbyte(0x0f); /*JNE mem_abrt_rout*/ + addbyte(0x85); + addlong(mem_abrt_rout - ((uint32_t)(&codeblock[block_current].data[block_pos]) + 4)); + /*If bits 2-11 of the address are now 0 then this crosses a page, so loop back*/ + addbyte(0xf7); /*TEST EDI, FFC*/ + addbyte(0xc7); + addlong(0xffc); + addbyte(0x74); /*JE slowpath_lp*/ + addbyte(-33); + addbyte(0xc3); /*RET*/ + + return addr; +} + void codegen_init() { int c; @@ -547,22 +1154,40 @@ void codegen_init() addbyte(0x5d); /*POP EBP*/ addbyte(0x5b); /*POP EDX*/ addbyte(0xC3); /*RET*/ - block_pos = 128; + block_pos = (block_pos + 15) & ~15; mem_load_addr_ea_l = gen_MEM_LOAD_ADDR_EA_L(); - block_pos = 256; + block_pos = (block_pos + 15) & ~15; mem_load_addr_ea_w = gen_MEM_LOAD_ADDR_EA_W(); - block_pos = 384; + block_pos = (block_pos + 15) & ~15; mem_load_addr_ea_b = gen_MEM_LOAD_ADDR_EA_B(); - block_pos = 512; + block_pos = (block_pos + 15) & ~15; mem_load_addr_ea_q = gen_MEM_LOAD_ADDR_EA_Q(); - block_pos = 640; + block_pos = (block_pos + 15) & ~15; mem_store_addr_ea_l = gen_MEM_STORE_ADDR_EA_L(); - block_pos = 768; + block_pos = (block_pos + 15) & ~15; mem_store_addr_ea_w = gen_MEM_STORE_ADDR_EA_W(); - block_pos = 896; + block_pos = (block_pos + 15) & ~15; mem_store_addr_ea_b = gen_MEM_STORE_ADDR_EA_B(); - block_pos = 1024; + block_pos = (block_pos + 15) & ~15; mem_store_addr_ea_q = gen_MEM_STORE_ADDR_EA_Q(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_b_no_abrt = gen_MEM_LOAD_ADDR_EA_B_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_b_no_abrt = gen_MEM_STORE_ADDR_EA_B_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_w_no_abrt = gen_MEM_LOAD_ADDR_EA_W_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_w_no_abrt = gen_MEM_STORE_ADDR_EA_W_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_load_addr_ea_l_no_abrt = gen_MEM_LOAD_ADDR_EA_L_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_store_addr_ea_l_no_abrt = gen_MEM_STORE_ADDR_EA_L_NO_ABRT(); + block_pos = (block_pos + 15) & ~15; + mem_check_write = gen_MEM_CHECK_WRITE(); + block_pos = (block_pos + 15) & ~15; + mem_check_write_w = gen_MEM_CHECK_WRITE_W(); + block_pos = (block_pos + 15) & ~15; + mem_check_write_l = gen_MEM_CHECK_WRITE_L(); asm( "fstcw %0\n" diff --git a/src/codegen_x86.h b/src/codegen_x86.h index e1d2da768..3a3662d32 100644 --- a/src/codegen_x86.h +++ b/src/codegen_x86.h @@ -10,6 +10,8 @@ #define BLOCK_EXIT_OFFSET 0x7f0 #define BLOCK_GPF_OFFSET (BLOCK_EXIT_OFFSET - 20) +#define BLOCK_MAX 1720 + enum { OP_RET = 0xc3 @@ -28,3 +30,13 @@ extern uint32_t mem_store_addr_ea_b; extern uint32_t mem_store_addr_ea_w; extern uint32_t mem_store_addr_ea_l; extern uint32_t mem_store_addr_ea_q; + +extern uint32_t mem_load_addr_ea_b_no_abrt; +extern uint32_t mem_store_addr_ea_b_no_abrt; +extern uint32_t mem_load_addr_ea_w_no_abrt; +extern uint32_t mem_store_addr_ea_w_no_abrt; +extern uint32_t mem_load_addr_ea_l_no_abrt; +extern uint32_t mem_store_addr_ea_l_no_abrt; +extern uint32_t mem_check_write; +extern uint32_t mem_check_write_w; +extern uint32_t mem_check_write_l;