Files
86Box-fork/src/codegen_new/codegen_reg.c

924 lines
39 KiB
C

#include <stdint.h>
#include <86box/86box.h>
#include "cpu.h"
#include <86box/mem.h>
#include "codegen.h"
#include "codegen_backend.h"
#include "codegen_ir_defs.h"
#include "codegen_reg.h"
int max_version_refcount;
uint16_t reg_dead_list = 0;
uint8_t reg_last_version[IREG_COUNT];
reg_version_t reg_version[IREG_COUNT][256];
ir_reg_t invalid_ir_reg = {IREG_INVALID};
ir_reg_t _host_regs[CODEGEN_HOST_REGS];
static uint8_t _host_reg_dirty[CODEGEN_HOST_REGS];
ir_reg_t host_fp_regs[CODEGEN_HOST_FP_REGS];
static uint8_t host_fp_reg_dirty[CODEGEN_HOST_FP_REGS];
typedef struct host_reg_set_t
{
ir_reg_t *regs;
uint8_t *dirty;
host_reg_def_t *reg_list;
uint16_t locked;
int nr_regs;
} host_reg_set_t;
static host_reg_set_t host_reg_set, host_fp_reg_set;
enum
{
REG_BYTE,
REG_WORD,
REG_DWORD,
REG_QWORD,
REG_POINTER,
REG_DOUBLE,
REG_FPU_ST_BYTE,
REG_FPU_ST_DOUBLE,
REG_FPU_ST_QWORD
};
enum
{
REG_INTEGER,
REG_FP
};
enum
{
/*Register may be accessed outside of code block, and must be written
back before any control transfers*/
REG_PERMANENT = 0,
/*Register will not be accessed outside of code block, and does not need
to be written back if there are no readers remaining*/
REG_VOLATILE = 1
};
struct
{
int native_size;
void *p;
int type;
int is_volatile;
} ireg_data[IREG_COUNT] =
{
[IREG_EAX] = {REG_DWORD, &EAX, REG_INTEGER, REG_PERMANENT},
[IREG_ECX] = {REG_DWORD, &ECX, REG_INTEGER, REG_PERMANENT},
[IREG_EDX] = {REG_DWORD, &EDX, REG_INTEGER, REG_PERMANENT},
[IREG_EBX] = {REG_DWORD, &EBX, REG_INTEGER, REG_PERMANENT},
[IREG_ESP] = {REG_DWORD, &ESP, REG_INTEGER, REG_PERMANENT},
[IREG_EBP] = {REG_DWORD, &EBP, REG_INTEGER, REG_PERMANENT},
[IREG_ESI] = {REG_DWORD, &ESI, REG_INTEGER, REG_PERMANENT},
[IREG_EDI] = {REG_DWORD, &EDI, REG_INTEGER, REG_PERMANENT},
[IREG_flags_op] = {REG_DWORD, &cpu_state.flags_op, REG_INTEGER, REG_PERMANENT},
[IREG_flags_res] = {REG_DWORD, &cpu_state.flags_res, REG_INTEGER, REG_PERMANENT},
[IREG_flags_op1] = {REG_DWORD, &cpu_state.flags_op1, REG_INTEGER, REG_PERMANENT},
[IREG_flags_op2] = {REG_DWORD, &cpu_state.flags_op2, REG_INTEGER, REG_PERMANENT},
[IREG_pc] = {REG_DWORD, &cpu_state.pc, REG_INTEGER, REG_PERMANENT},
[IREG_oldpc] = {REG_DWORD, &cpu_state.oldpc, REG_INTEGER, REG_PERMANENT},
[IREG_eaaddr] = {REG_DWORD, &cpu_state.eaaddr, REG_INTEGER, REG_PERMANENT},
[IREG_ea_seg] = {REG_POINTER, &cpu_state.ea_seg, REG_INTEGER, REG_PERMANENT},
[IREG_op32] = {REG_DWORD, &cpu_state.op32, REG_INTEGER, REG_PERMANENT},
[IREG_ssegsx] = {REG_BYTE, &cpu_state.ssegs, REG_INTEGER, REG_PERMANENT},
[IREG_rm_mod_reg] = {REG_DWORD, &cpu_state.rm_data.rm_mod_reg_data, REG_INTEGER, REG_PERMANENT},
#ifdef USE_ACYCS
[IREG_acycs] = {REG_DWORD, &acycs, REG_INTEGER, REG_PERMANENT},
#endif
[IREG_cycles] = {REG_DWORD, &cpu_state._cycles, REG_INTEGER, REG_PERMANENT},
[IREG_CS_base] = {REG_DWORD, &cpu_state.seg_cs.base, REG_INTEGER, REG_PERMANENT},
[IREG_DS_base] = {REG_DWORD, &cpu_state.seg_ds.base, REG_INTEGER, REG_PERMANENT},
[IREG_ES_base] = {REG_DWORD, &cpu_state.seg_es.base, REG_INTEGER, REG_PERMANENT},
[IREG_FS_base] = {REG_DWORD, &cpu_state.seg_fs.base, REG_INTEGER, REG_PERMANENT},
[IREG_GS_base] = {REG_DWORD, &cpu_state.seg_gs.base, REG_INTEGER, REG_PERMANENT},
[IREG_SS_base] = {REG_DWORD, &cpu_state.seg_ss.base, REG_INTEGER, REG_PERMANENT},
[IREG_CS_seg] = {REG_WORD, &cpu_state.seg_cs.seg, REG_INTEGER, REG_PERMANENT},
[IREG_DS_seg] = {REG_WORD, &cpu_state.seg_ds.seg, REG_INTEGER, REG_PERMANENT},
[IREG_ES_seg] = {REG_WORD, &cpu_state.seg_es.seg, REG_INTEGER, REG_PERMANENT},
[IREG_FS_seg] = {REG_WORD, &cpu_state.seg_fs.seg, REG_INTEGER, REG_PERMANENT},
[IREG_GS_seg] = {REG_WORD, &cpu_state.seg_gs.seg, REG_INTEGER, REG_PERMANENT},
[IREG_SS_seg] = {REG_WORD, &cpu_state.seg_ss.seg, REG_INTEGER, REG_PERMANENT},
[IREG_FPU_TOP] = {REG_DWORD, &cpu_state.TOP, REG_INTEGER, REG_PERMANENT},
[IREG_ST0] = {REG_FPU_ST_DOUBLE, &cpu_state.ST[0], REG_FP, REG_PERMANENT},
[IREG_ST1] = {REG_FPU_ST_DOUBLE, &cpu_state.ST[0], REG_FP, REG_PERMANENT},
[IREG_ST2] = {REG_FPU_ST_DOUBLE, &cpu_state.ST[0], REG_FP, REG_PERMANENT},
[IREG_ST3] = {REG_FPU_ST_DOUBLE, &cpu_state.ST[0], REG_FP, REG_PERMANENT},
[IREG_ST4] = {REG_FPU_ST_DOUBLE, &cpu_state.ST[0], REG_FP, REG_PERMANENT},
[IREG_ST5] = {REG_FPU_ST_DOUBLE, &cpu_state.ST[0], REG_FP, REG_PERMANENT},
[IREG_ST6] = {REG_FPU_ST_DOUBLE, &cpu_state.ST[0], REG_FP, REG_PERMANENT},
[IREG_ST7] = {REG_FPU_ST_DOUBLE, &cpu_state.ST[0], REG_FP, REG_PERMANENT},
[IREG_tag0] = {REG_FPU_ST_BYTE, &cpu_state.tag[0], REG_INTEGER, REG_PERMANENT},
[IREG_tag1] = {REG_FPU_ST_BYTE, &cpu_state.tag[0], REG_INTEGER, REG_PERMANENT},
[IREG_tag2] = {REG_FPU_ST_BYTE, &cpu_state.tag[0], REG_INTEGER, REG_PERMANENT},
[IREG_tag3] = {REG_FPU_ST_BYTE, &cpu_state.tag[0], REG_INTEGER, REG_PERMANENT},
[IREG_tag4] = {REG_FPU_ST_BYTE, &cpu_state.tag[0], REG_INTEGER, REG_PERMANENT},
[IREG_tag5] = {REG_FPU_ST_BYTE, &cpu_state.tag[0], REG_INTEGER, REG_PERMANENT},
[IREG_tag6] = {REG_FPU_ST_BYTE, &cpu_state.tag[0], REG_INTEGER, REG_PERMANENT},
[IREG_tag7] = {REG_FPU_ST_BYTE, &cpu_state.tag[0], REG_INTEGER, REG_PERMANENT},
[IREG_ST0_i64] = {REG_FPU_ST_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_ST1_i64] = {REG_FPU_ST_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_ST2_i64] = {REG_FPU_ST_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_ST3_i64] = {REG_FPU_ST_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_ST4_i64] = {REG_FPU_ST_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_ST5_i64] = {REG_FPU_ST_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_ST6_i64] = {REG_FPU_ST_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_ST7_i64] = {REG_FPU_ST_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_MM0x] = {REG_QWORD, &cpu_state.MM[0], REG_FP, REG_PERMANENT},
[IREG_MM1x] = {REG_QWORD, &cpu_state.MM[1], REG_FP, REG_PERMANENT},
[IREG_MM2x] = {REG_QWORD, &cpu_state.MM[2], REG_FP, REG_PERMANENT},
[IREG_MM3x] = {REG_QWORD, &cpu_state.MM[3], REG_FP, REG_PERMANENT},
[IREG_MM4x] = {REG_QWORD, &cpu_state.MM[4], REG_FP, REG_PERMANENT},
[IREG_MM5x] = {REG_QWORD, &cpu_state.MM[5], REG_FP, REG_PERMANENT},
[IREG_MM6x] = {REG_QWORD, &cpu_state.MM[6], REG_FP, REG_PERMANENT},
[IREG_MM7x] = {REG_QWORD, &cpu_state.MM[7], REG_FP, REG_PERMANENT},
[IREG_NPXCx] = {REG_WORD, &cpu_state.npxc, REG_INTEGER, REG_PERMANENT},
[IREG_NPXSx] = {REG_WORD, &cpu_state.npxs, REG_INTEGER, REG_PERMANENT},
[IREG_flagsx] = {REG_WORD, &cpu_state.flags, REG_INTEGER, REG_PERMANENT},
[IREG_eflagsx] = {REG_WORD, &cpu_state.eflags, REG_INTEGER, REG_PERMANENT},
[IREG_CS_limit_low] = {REG_DWORD, &cpu_state.seg_cs.limit_low, REG_INTEGER, REG_PERMANENT},
[IREG_DS_limit_low] = {REG_DWORD, &cpu_state.seg_ds.limit_low, REG_INTEGER, REG_PERMANENT},
[IREG_ES_limit_low] = {REG_DWORD, &cpu_state.seg_es.limit_low, REG_INTEGER, REG_PERMANENT},
[IREG_FS_limit_low] = {REG_DWORD, &cpu_state.seg_fs.limit_low, REG_INTEGER, REG_PERMANENT},
[IREG_GS_limit_low] = {REG_DWORD, &cpu_state.seg_gs.limit_low, REG_INTEGER, REG_PERMANENT},
[IREG_SS_limit_low] = {REG_DWORD, &cpu_state.seg_ss.limit_low, REG_INTEGER, REG_PERMANENT},
[IREG_CS_limit_high] = {REG_DWORD, &cpu_state.seg_cs.limit_high, REG_INTEGER, REG_PERMANENT},
[IREG_DS_limit_high] = {REG_DWORD, &cpu_state.seg_ds.limit_high, REG_INTEGER, REG_PERMANENT},
[IREG_ES_limit_high] = {REG_DWORD, &cpu_state.seg_es.limit_high, REG_INTEGER, REG_PERMANENT},
[IREG_FS_limit_high] = {REG_DWORD, &cpu_state.seg_fs.limit_high, REG_INTEGER, REG_PERMANENT},
[IREG_GS_limit_high] = {REG_DWORD, &cpu_state.seg_gs.limit_high, REG_INTEGER, REG_PERMANENT},
[IREG_SS_limit_high] = {REG_DWORD, &cpu_state.seg_ss.limit_high, REG_INTEGER, REG_PERMANENT},
/*Temporary registers are stored on the stack, and are not guaranteed to
be preserved across uOPs. They will not be written back if they will
not be read again.*/
[IREG_temp0] = {REG_DWORD, (void *)16, REG_INTEGER, REG_VOLATILE},
[IREG_temp1] = {REG_DWORD, (void *)20, REG_INTEGER, REG_VOLATILE},
[IREG_temp2] = {REG_DWORD, (void *)24, REG_INTEGER, REG_VOLATILE},
[IREG_temp3] = {REG_DWORD, (void *)28, REG_INTEGER, REG_VOLATILE},
[IREG_temp0d] = {REG_DOUBLE, (void *)40, REG_FP, REG_VOLATILE},
[IREG_temp1d] = {REG_DOUBLE, (void *)48, REG_FP, REG_VOLATILE},
};
void codegen_reg_mark_as_required()
{
int reg;
for (reg = 0; reg < IREG_COUNT; reg++)
{
int last_version = reg_last_version[reg];
if (last_version > 0 && ireg_data[reg].is_volatile == REG_PERMANENT)
reg_version[reg][last_version].flags |= REG_FLAGS_REQUIRED;
}
}
int reg_is_native_size(ir_reg_t ir_reg)
{
int native_size = ireg_data[IREG_GET_REG(ir_reg.reg)].native_size;
int requested_size = IREG_GET_SIZE(ir_reg.reg);
switch (native_size)
{
case REG_BYTE: case REG_FPU_ST_BYTE:
return (requested_size == IREG_SIZE_B);
case REG_WORD:
return (requested_size == IREG_SIZE_W);
case REG_DWORD:
return (requested_size == IREG_SIZE_L);
case REG_QWORD: case REG_FPU_ST_QWORD: case REG_DOUBLE: case REG_FPU_ST_DOUBLE:
return ((requested_size == IREG_SIZE_D) || (requested_size == IREG_SIZE_Q));
case REG_POINTER:
if (sizeof(void *) == 4)
return (requested_size == IREG_SIZE_L);
return (requested_size == IREG_SIZE_Q);
default:
fatal("get_reg_is_native_size: unknown native size %i\n", native_size);
}
return 0;
}
void codegen_reg_reset()
{
int c;
host_reg_set.regs = _host_regs;
host_reg_set.dirty = _host_reg_dirty;
host_reg_set.reg_list = codegen_host_reg_list;
host_reg_set.locked = 0;
host_reg_set.nr_regs = CODEGEN_HOST_REGS;
host_fp_reg_set.regs = host_fp_regs;
host_fp_reg_set.dirty = host_fp_reg_dirty;
host_fp_reg_set.reg_list = codegen_host_fp_reg_list;
host_fp_reg_set.locked = 0;
host_fp_reg_set.nr_regs = CODEGEN_HOST_FP_REGS;
for (c = 0; c < IREG_COUNT; c++)
{
reg_last_version[c] = 0;
reg_version[c][0].refcount = 0;
}
for (c = 0; c < CODEGEN_HOST_REGS; c++)
{
host_reg_set.regs[c] = invalid_ir_reg;
host_reg_set.dirty[c] = 0;
}
for (c = 0; c < CODEGEN_HOST_FP_REGS; c++)
{
host_fp_reg_set.regs[c] = invalid_ir_reg;
host_fp_reg_set.dirty[c] = 0;
}
reg_dead_list = 0;
max_version_refcount = 0;
}
static inline int ir_get_refcount(ir_reg_t ir_reg)
{
return reg_version[IREG_GET_REG(ir_reg.reg)][ir_reg.version].refcount;
}
static inline host_reg_set_t *get_reg_set(ir_reg_t ir_reg)
{
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type == REG_INTEGER)
return &host_reg_set;
else
return &host_fp_reg_set;
}
static void codegen_reg_load(host_reg_set_t *reg_set, codeblock_t *block, int c, ir_reg_t ir_reg)
{
switch (ireg_data[IREG_GET_REG(ir_reg.reg)].native_size)
{
case REG_WORD:
#ifndef RELEASE_BUILD
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type != REG_INTEGER)
fatal("codegen_reg_load - REG_WORD !REG_INTEGER\n");
#endif
if ((uintptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p < 256)
codegen_direct_read_16_stack(block, reg_set->reg_list[c].reg, (intptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p);
else
codegen_direct_read_16(block, reg_set->reg_list[c].reg, ireg_data[IREG_GET_REG(ir_reg.reg)].p);
break;
case REG_DWORD:
#ifndef RELEASE_BUILD
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type != REG_INTEGER)
fatal("codegen_reg_load - REG_DWORD !REG_INTEGER\n");
#endif
if ((uintptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p < 256)
codegen_direct_read_32_stack(block, reg_set->reg_list[c].reg, (intptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p);
else
codegen_direct_read_32(block, reg_set->reg_list[c].reg, ireg_data[IREG_GET_REG(ir_reg.reg)].p);
break;
case REG_QWORD:
#ifndef RELEASE_BUILD
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type != REG_FP)
fatal("codegen_reg_load - REG_QWORD !REG_FP\n");
#endif
if ((uintptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p < 256)
codegen_direct_read_64_stack(block, reg_set->reg_list[c].reg, (intptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p);
else
codegen_direct_read_64(block, reg_set->reg_list[c].reg, ireg_data[IREG_GET_REG(ir_reg.reg)].p);
break;
case REG_POINTER:
#ifndef RELEASE_BUILD
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type != REG_INTEGER)
fatal("codegen_reg_load - REG_POINTER !REG_INTEGER\n");
#endif
if ((uintptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p < 256)
codegen_direct_read_pointer_stack(block, reg_set->reg_list[c].reg, (intptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p);
else
codegen_direct_read_pointer(block, reg_set->reg_list[c].reg, ireg_data[IREG_GET_REG(ir_reg.reg)].p);
break;
case REG_DOUBLE:
#ifndef RELEASE_BUILD
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type != REG_FP)
fatal("codegen_reg_load - REG_DOUBLE !REG_FP\n");
#endif
if ((uintptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p < 256)
codegen_direct_read_double_stack(block, reg_set->reg_list[c].reg, (intptr_t)ireg_data[IREG_GET_REG(ir_reg.reg)].p);
else
codegen_direct_read_double(block, reg_set->reg_list[c].reg, ireg_data[IREG_GET_REG(ir_reg.reg)].p);
break;
case REG_FPU_ST_BYTE:
#ifndef RELEASE_BUILD
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type != REG_INTEGER)
fatal("codegen_reg_load - REG_FPU_ST_BYTE !REG_INTEGER\n");
#endif
if (block->flags & CODEBLOCK_STATIC_TOP)
codegen_direct_read_8(block, reg_set->reg_list[c].reg, &cpu_state.tag[ir_reg.reg & 7]);
else
codegen_direct_read_st_8(block, reg_set->reg_list[c].reg, &cpu_state.tag[0], ir_reg.reg & 7);
break;
case REG_FPU_ST_QWORD:
#ifndef RELEASE_BUILD
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type != REG_FP)
fatal("codegen_reg_load - REG_FPU_ST_QWORD !REG_FP\n");
#endif
if (block->flags & CODEBLOCK_STATIC_TOP)
codegen_direct_read_64(block, reg_set->reg_list[c].reg, &cpu_state.MM[ir_reg.reg & 7]);
else
codegen_direct_read_st_64(block, reg_set->reg_list[c].reg, &cpu_state.MM[0], ir_reg.reg & 7);
break;
case REG_FPU_ST_DOUBLE:
#ifndef RELEASE_BUILD
if (ireg_data[IREG_GET_REG(ir_reg.reg)].type != REG_FP)
fatal("codegen_reg_load - REG_FPU_ST_DOUBLE !REG_FP\n");
#endif
if (block->flags & CODEBLOCK_STATIC_TOP)
codegen_direct_read_double(block, reg_set->reg_list[c].reg, &cpu_state.ST[ir_reg.reg & 7]);
else
codegen_direct_read_st_double(block, reg_set->reg_list[c].reg, &cpu_state.ST[0], ir_reg.reg & 7);
break;
default:
fatal("codegen_reg_load - native_size=%i reg=%i\n", ireg_data[IREG_GET_REG(ir_reg.reg)].native_size, IREG_GET_REG(ir_reg.reg));
}
reg_set->regs[c] = ir_reg;
}
static void codegen_reg_writeback(host_reg_set_t *reg_set, codeblock_t *block, int c, int invalidate)
{
int ir_reg = IREG_GET_REG(reg_set->regs[c].reg);
void *p = ireg_data[ir_reg].p;
if (!reg_version[ir_reg][reg_set->regs[c].version].refcount &&
ireg_data[ir_reg].is_volatile)
return;
switch (ireg_data[ir_reg].native_size)
{
case REG_BYTE:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_INTEGER)
fatal("codegen_reg_writeback - REG_BYTE !REG_INTEGER\n");
if ((uintptr_t)p < 256)
fatal("codegen_reg_writeback - REG_BYTE %p\n", p);
#endif
codegen_direct_write_8(block, p, reg_set->reg_list[c].reg);
break;
case REG_WORD:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_INTEGER)
fatal("codegen_reg_writeback - REG_WORD !REG_INTEGER\n");
if ((uintptr_t)p < 256)
fatal("codegen_reg_writeback - REG_WORD %p\n", p);
#endif
codegen_direct_write_16(block, p, reg_set->reg_list[c].reg);
break;
case REG_DWORD:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_INTEGER)
fatal("codegen_reg_writeback - REG_DWORD !REG_INTEGER\n");
#endif
if ((uintptr_t)p < 256)
codegen_direct_write_32_stack(block, (intptr_t)p, reg_set->reg_list[c].reg);
else
codegen_direct_write_32(block, p, reg_set->reg_list[c].reg);
break;
case REG_QWORD:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_FP)
fatal("codegen_reg_writeback - REG_QWORD !REG_FP\n");
#endif
if ((uintptr_t)p < 256)
codegen_direct_write_64_stack(block, (intptr_t)p, reg_set->reg_list[c].reg);
else
codegen_direct_write_64(block, p, reg_set->reg_list[c].reg);
break;
case REG_POINTER:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_INTEGER)
fatal("codegen_reg_writeback - REG_POINTER !REG_INTEGER\n");
if ((uintptr_t)p < 256)
fatal("codegen_reg_writeback - REG_POINTER %p\n", p);
#endif
codegen_direct_write_ptr(block, p, reg_set->reg_list[c].reg);
break;
case REG_DOUBLE:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_FP)
fatal("codegen_reg_writeback - REG_DOUBLE !REG_FP\n");
#endif
if ((uintptr_t)p < 256)
codegen_direct_write_double_stack(block, (intptr_t)p, reg_set->reg_list[c].reg);
else
codegen_direct_write_double(block, p, reg_set->reg_list[c].reg);
break;
case REG_FPU_ST_BYTE:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_INTEGER)
fatal("codegen_reg_writeback - REG_FPU_ST_BYTE !REG_INTEGER\n");
#endif
if (block->flags & CODEBLOCK_STATIC_TOP)
codegen_direct_write_8(block, &cpu_state.tag[reg_set->regs[c].reg & 7], reg_set->reg_list[c].reg);
else
codegen_direct_write_st_8(block, &cpu_state.tag[0], reg_set->regs[c].reg & 7, reg_set->reg_list[c].reg);
break;
case REG_FPU_ST_QWORD:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_FP)
fatal("codegen_reg_writeback - REG_FPU_ST_QWORD !REG_FP\n");
#endif
if (block->flags & CODEBLOCK_STATIC_TOP)
codegen_direct_write_64(block, &cpu_state.MM[reg_set->regs[c].reg & 7], reg_set->reg_list[c].reg);
else
codegen_direct_write_st_64(block, &cpu_state.MM[0], reg_set->regs[c].reg & 7, reg_set->reg_list[c].reg);
break;
case REG_FPU_ST_DOUBLE:
#ifndef RELEASE_BUILD
if (ireg_data[ir_reg].type != REG_FP)
fatal("codegen_reg_writeback - REG_FPU_ST_DOUBLE !REG_FP\n");
#endif
if (block->flags & CODEBLOCK_STATIC_TOP)
codegen_direct_write_double(block, &cpu_state.ST[reg_set->regs[c].reg & 7], reg_set->reg_list[c].reg);
else
codegen_direct_write_st_double(block, &cpu_state.ST[0], reg_set->regs[c].reg & 7, reg_set->reg_list[c].reg);
break;
default:
fatal("codegen_reg_flush - native_size=%i\n", ireg_data[ir_reg].native_size);
}
if (invalidate)
reg_set->regs[c] = invalid_ir_reg;
reg_set->dirty[c] = 0;
}
#ifdef CODEGEN_BACKEND_HAS_MOV_IMM
void codegen_reg_write_imm(codeblock_t *block, ir_reg_t ir_reg, uint32_t imm_data)
{
int reg_idx = IREG_GET_REG(ir_reg.reg);
void *p = ireg_data[reg_idx].p;
switch (ireg_data[reg_idx].native_size)
{
case REG_BYTE:
#ifndef RELEASE_BUILD
if ((uintptr_t)p < 256)
fatal("codegen_reg_write_imm - REG_BYTE %p\n", p);
#endif
codegen_direct_write_8_imm(block, p, imm_data);
break;
case REG_WORD:
#ifndef RELEASE_BUILD
if ((uintptr_t)p < 256)
fatal("codegen_reg_write_imm - REG_WORD %p\n", p);
#endif
codegen_direct_write_16_imm(block, p, imm_data);
break;
case REG_DWORD:
if ((uintptr_t)p < 256)
codegen_direct_write_32_imm_stack(block, (int)p, imm_data);
else
codegen_direct_write_32_imm(block, p, imm_data);
break;
case REG_POINTER:
case REG_QWORD:
case REG_DOUBLE:
case REG_FPU_ST_BYTE:
case REG_FPU_ST_QWORD:
case REG_FPU_ST_DOUBLE:
default:
fatal("codegen_reg_write_imm - native_size=%i\n", ireg_data[reg_idx].native_size);
}
}
#endif
static void alloc_reg(ir_reg_t ir_reg)
{
host_reg_set_t *reg_set = get_reg_set(ir_reg);
int nr_regs = (reg_set == &host_reg_set) ? CODEGEN_HOST_REGS : CODEGEN_HOST_FP_REGS;
int c;
for (c = 0; c < nr_regs; c++)
{
if (IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(ir_reg.reg))
{
#ifndef RELEASE_BUILD
if (reg_set->regs[c].version != ir_reg.version)
fatal("alloc_reg - host_regs[c].version != ir_reg.version %i %p %p %i %i\n", c, reg_set, &host_reg_set, reg_set->regs[c].reg, ir_reg.reg);
#endif
reg_set->locked |= (1 << c);
return;
}
}
}
static void alloc_dest_reg(ir_reg_t ir_reg, int dest_reference)
{
host_reg_set_t *reg_set = get_reg_set(ir_reg);
int nr_regs = (reg_set == &host_reg_set) ? CODEGEN_HOST_REGS : CODEGEN_HOST_FP_REGS;
int c;
for (c = 0; c < nr_regs; c++)
{
if (IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(ir_reg.reg))
{
if (reg_set->regs[c].version == ir_reg.version)
{
reg_set->locked |= (1 << c);
}
else
{
/*The immediate prior version may have been
optimised out, so search backwards to find the
last valid version*/
int prev_version = ir_reg.version-1;
while (prev_version >= 0)
{
reg_version_t *regv = &reg_version[IREG_GET_REG(reg_set->regs[c].reg)][prev_version];
if (!(regv->flags & REG_FLAGS_DEAD) && regv->refcount == dest_reference)
{
reg_set->locked |= (1 << c);
return;
}
prev_version--;
}
fatal("codegen_reg_alloc_register - host_regs[c].version != dest_reg_a.version %i,%i %i\n", reg_set->regs[c].version, ir_reg.version, dest_reference);
}
return;
}
}
}
void codegen_reg_alloc_register(ir_reg_t dest_reg_a, ir_reg_t src_reg_a, ir_reg_t src_reg_b, ir_reg_t src_reg_c)
{
int dest_reference = 0;
host_reg_set.locked = 0;
host_fp_reg_set.locked = 0;
if (!ir_reg_is_invalid(dest_reg_a))
{
if (!ir_reg_is_invalid(src_reg_a) && IREG_GET_REG(src_reg_a.reg) == IREG_GET_REG(dest_reg_a.reg) && src_reg_a.version == dest_reg_a.version-1)
dest_reference++;
if (!ir_reg_is_invalid(src_reg_b) && IREG_GET_REG(src_reg_b.reg) == IREG_GET_REG(dest_reg_a.reg) && src_reg_b.version == dest_reg_a.version-1)
dest_reference++;
if (!ir_reg_is_invalid(src_reg_c) && IREG_GET_REG(src_reg_c.reg) == IREG_GET_REG(dest_reg_a.reg) && src_reg_c.version == dest_reg_a.version-1)
dest_reference++;
}
if (!ir_reg_is_invalid(src_reg_a))
alloc_reg(src_reg_a);
if (!ir_reg_is_invalid(src_reg_b))
alloc_reg(src_reg_b);
if (!ir_reg_is_invalid(src_reg_c))
alloc_reg(src_reg_c);
if (!ir_reg_is_invalid(dest_reg_a))
alloc_dest_reg(dest_reg_a, dest_reference);
}
ir_host_reg_t codegen_reg_alloc_read_reg(codeblock_t *block, ir_reg_t ir_reg, int *host_reg_idx)
{
host_reg_set_t *reg_set = get_reg_set(ir_reg);
int c;
/*Search for required register*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!ir_reg_is_invalid(reg_set->regs[c]) && IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(ir_reg.reg) && reg_set->regs[c].version == ir_reg.version)
break;
if (!ir_reg_is_invalid(reg_set->regs[c]) && IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(ir_reg.reg) && reg_set->regs[c].version <= ir_reg.version)
{
reg_version[IREG_GET_REG(reg_set->regs[c].reg)][reg_set->regs[c].version].refcount++;
break;
}
#ifndef RELEASE_BUILD
if (!ir_reg_is_invalid(reg_set->regs[c]) && IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(ir_reg.reg) && reg_version[IREG_GET_REG(reg_set->regs[c].reg)][reg_set->regs[c].version].refcount)
fatal("codegen_reg_alloc_read_reg - version mismatch!\n");
#endif
}
if (c == reg_set->nr_regs)
{
/*No unused registers. Search for an unlocked register with no pending reads*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!(reg_set->locked & (1 << c)) && IREG_GET_REG(reg_set->regs[c].reg) != IREG_INVALID && !ir_get_refcount(reg_set->regs[c]))
break;
}
if (c == reg_set->nr_regs)
{
/*Search for any unlocked register*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!(reg_set->locked & (1 << c)))
break;
}
#ifndef RELEASE_BUILD
if (c == reg_set->nr_regs)
fatal("codegen_reg_alloc_read_reg - out of registers\n");
#endif
}
if (reg_set->dirty[c])
codegen_reg_writeback(reg_set, block, c, 1);
codegen_reg_load(reg_set, block, c, ir_reg);
reg_set->locked |= (1 << c);
reg_set->dirty[c] = 0;
}
reg_version[IREG_GET_REG(reg_set->regs[c].reg)][reg_set->regs[c].version].refcount--;
#ifndef RELEASE_BUILD
if (reg_version[IREG_GET_REG(reg_set->regs[c].reg)][reg_set->regs[c].version].refcount == (uint8_t)-1)
fatal("codegen_reg_alloc_read_reg - refcount < 0\n");
#endif
if (host_reg_idx)
*host_reg_idx = c;
return reg_set->reg_list[c].reg | IREG_GET_SIZE(ir_reg.reg);
}
ir_host_reg_t codegen_reg_alloc_write_reg(codeblock_t *block, ir_reg_t ir_reg)
{
host_reg_set_t *reg_set = get_reg_set(ir_reg);
int c;
if (!reg_is_native_size(ir_reg))
{
/*Read in parent register so we can do partial accesses to it*/
ir_reg_t parent_reg;
parent_reg.reg = IREG_GET_REG(ir_reg.reg) | IREG_SIZE_L;
parent_reg.version = ir_reg.version - 1;
reg_version[IREG_GET_REG(ir_reg.reg)][ir_reg.version - 1].refcount++;
codegen_reg_alloc_read_reg(block, parent_reg, &c);
#ifndef RELEASE_BUILD
if (IREG_GET_REG(reg_set->regs[c].reg) != IREG_GET_REG(ir_reg.reg) || reg_set->regs[c].version > ir_reg.version-1)
fatal("codegen_reg_alloc_write_reg sub_reg - doesn't match %i %02x.%i %02x.%i\n", c,
reg_set->regs[c].reg,reg_set->regs[c].version,
ir_reg.reg,ir_reg.version);
#endif
reg_set->regs[c].reg = ir_reg.reg;
reg_set->regs[c].version = ir_reg.version;
reg_set->dirty[c] = 1;
return reg_set->reg_list[c].reg | IREG_GET_SIZE(ir_reg.reg);
}
/*Search for previous version in host register*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!ir_reg_is_invalid(reg_set->regs[c]) && IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(ir_reg.reg))
{
if (reg_set->regs[c].version <= ir_reg.version-1)
{
#ifndef RELEASE_BUILD
if (reg_version[IREG_GET_REG(reg_set->regs[c].reg)][reg_set->regs[c].version].refcount != 0)
fatal("codegen_reg_alloc_write_reg - previous version refcount != 0\n");
#endif
break;
}
}
}
if (c == reg_set->nr_regs)
{
/*Search for unused registers*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (ir_reg_is_invalid(reg_set->regs[c]))
break;
}
if (c == reg_set->nr_regs)
{
/*No unused registers. Search for an unlocked register*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!(reg_set->locked & (1 << c)))
break;
}
#ifndef RELEASE_BUILD
if (c == reg_set->nr_regs)
fatal("codegen_reg_alloc_write_reg - out of registers\n");
#endif
if (reg_set->dirty[c])
codegen_reg_writeback(reg_set, block, c, 1);
}
}
reg_set->regs[c].reg = ir_reg.reg;
reg_set->regs[c].version = ir_reg.version;
reg_set->dirty[c] = 1;
return reg_set->reg_list[c].reg | IREG_GET_SIZE(ir_reg.reg);
}
#ifdef CODEGEN_BACKEND_HAS_MOV_IMM
int codegen_reg_is_loaded(ir_reg_t ir_reg)
{
host_reg_set_t *reg_set = get_reg_set(ir_reg);
int c;
/*Search for previous version in host register*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!ir_reg_is_invalid(reg_set->regs[c]) && IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(ir_reg.reg))
{
if (reg_set->regs[c].version <= ir_reg.version-1)
{
#ifndef RELEASE_BUILD
if (reg_version[IREG_GET_REG(reg_set->regs[c].reg)][reg_set->regs[c].version].refcount != 0)
fatal("codegen_reg_alloc_write_reg - previous version refcount != 0\n");
#endif
return 1;
}
}
}
return 0;
}
#endif
void codegen_reg_rename(codeblock_t *block, ir_reg_t src, ir_reg_t dst)
{
host_reg_set_t *reg_set = get_reg_set(src);
int c;
int target;
// pclog("rename: %i.%i -> %i.%i\n", src.reg,src.version, dst.reg, dst.version);
/*Search for required register*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!ir_reg_is_invalid(reg_set->regs[c]) && IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(src.reg) && reg_set->regs[c].version == src.version)
break;
}
#ifndef RELEASE_BUILD
if (c == reg_set->nr_regs)
fatal("codegen_reg_rename: Can't find register to rename\n");
#endif
target = c;
if (reg_set->dirty[target])
codegen_reg_writeback(reg_set, block, target, 0);
reg_set->regs[target] = dst;
reg_set->dirty[target] = 1;
// pclog("renamed reg %i dest=%i.%i\n", target, dst.reg, dst.version);
/*Invalidate any stale copies of the dest register*/
for (c = 0; c < reg_set->nr_regs; c++)
{
if (c == target)
continue;
if (!ir_reg_is_invalid(reg_set->regs[c]) && IREG_GET_REG(reg_set->regs[c].reg) == IREG_GET_REG(dst.reg))
{
reg_set->regs[c] = invalid_ir_reg;
reg_set->dirty[c] = 0;
}
}
}
void codegen_reg_flush(ir_data_t *ir, codeblock_t *block)
{
host_reg_set_t *reg_set;
int c;
reg_set = &host_reg_set;
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!ir_reg_is_invalid(reg_set->regs[c]) && reg_set->dirty[c])
{
codegen_reg_writeback(reg_set, block, c, 0);
}
if (reg_set->reg_list[c].flags & HOST_REG_FLAG_VOLATILE)
{
reg_set->regs[c] = invalid_ir_reg;
reg_set->dirty[c] = 0;
}
}
reg_set = &host_fp_reg_set;
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!ir_reg_is_invalid(reg_set->regs[c]) && reg_set->dirty[c])
{
codegen_reg_writeback(reg_set, block, c, 0);
}
if (reg_set->reg_list[c].flags & HOST_REG_FLAG_VOLATILE)
{
reg_set->regs[c] = invalid_ir_reg;
reg_set->dirty[c] = 0;
}
}
}
void codegen_reg_flush_invalidate(ir_data_t *ir, codeblock_t *block)
{
host_reg_set_t *reg_set;
int c;
reg_set = &host_reg_set;
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!ir_reg_is_invalid(reg_set->regs[c]) && reg_set->dirty[c])
{
codegen_reg_writeback(reg_set, block, c, 1);
}
reg_set->regs[c] = invalid_ir_reg;
reg_set->dirty[c] = 0;
}
reg_set = &host_fp_reg_set;
for (c = 0; c < reg_set->nr_regs; c++)
{
if (!ir_reg_is_invalid(reg_set->regs[c]) && reg_set->dirty[c])
{
codegen_reg_writeback(reg_set, block, c, 1);
}
reg_set->regs[c] = invalid_ir_reg;
reg_set->dirty[c] = 0;
}
}
/*Process dead register list, and optimise out register versions and uOPs where
possible*/
void codegen_reg_process_dead_list(ir_data_t *ir)
{
while (reg_dead_list)
{
int version = reg_dead_list & 0xff;
int reg = reg_dead_list >> 8;
reg_version_t *regv = &reg_version[reg][version];
uop_t *uop = &ir->uops[regv->parent_uop];
/*Barrier uOPs should be preserved*/
if (!(uop->type & (UOP_TYPE_BARRIER | UOP_TYPE_ORDER_BARRIER)))
{
uop->type = UOP_INVALID;
/*Adjust refcounts on source registers. If these drop to
zero then those registers can be considered for removal*/
if (uop->src_reg_a.reg != IREG_INVALID)
{
reg_version_t *src_regv = &reg_version[IREG_GET_REG(uop->src_reg_a.reg)][uop->src_reg_a.version];
src_regv->refcount--;
if (!src_regv->refcount)
add_to_dead_list(src_regv, IREG_GET_REG(uop->src_reg_a.reg), uop->src_reg_a.version);
}
if (uop->src_reg_b.reg != IREG_INVALID)
{
reg_version_t *src_regv = &reg_version[IREG_GET_REG(uop->src_reg_b.reg)][uop->src_reg_b.version];
src_regv->refcount--;
if (!src_regv->refcount)
add_to_dead_list(src_regv, IREG_GET_REG(uop->src_reg_b.reg), uop->src_reg_b.version);
}
if (uop->src_reg_c.reg != IREG_INVALID)
{
reg_version_t *src_regv = &reg_version[IREG_GET_REG(uop->src_reg_c.reg)][uop->src_reg_c.version];
src_regv->refcount--;
if (!src_regv->refcount)
add_to_dead_list(src_regv, IREG_GET_REG(uop->src_reg_c.reg), uop->src_reg_c.version);
}
regv->flags |= REG_FLAGS_DEAD;
}
reg_dead_list = regv->next;
}
}