diff --git a/src/codegen/codegen_accumulate_x86-64.c b/src/codegen/codegen_accumulate_x86-64.c index 05a728ae5..d2f1e0c3f 100644 --- a/src/codegen/codegen_accumulate_x86-64.c +++ b/src/codegen/codegen_accumulate_x86-64.c @@ -47,17 +47,17 @@ void codegen_accumulate(int acc_reg, int delta) void codegen_accumulate_flush(void) { + intptr_t rip; + if (acc_regs[0].count) { - addbyte(0x55); /*push rbp*/ - addbyte(0x48); /*mov rbp,val*/ - addbyte(0xbd); - addlong((uint32_t) (acc_regs[0].dest_reg & 0xffffffffULL)); - addlong((uint32_t) (acc_regs[0].dest_reg >> 32ULL)); - addbyte(0x81); /* add d,[rbp][0],val */ - addbyte(0x45); - addbyte(0x00); + /* To reduce the size of the generated code, we take advantage of + the fact that the target offset points to _cycles within cpu_state, + so we can just use our existing infrastracture for variables + relative to cpu_state. */ + addbyte(0x81); /*ADDL $acc_regs[0].count,(_cycles)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(_cycles)); addlong(acc_regs[0].count); - addbyte(0x5d); /*pop rbp*/ } acc_regs[0].count = 0; diff --git a/src/codegen/codegen_accumulate_x86.c b/src/codegen/codegen_accumulate_x86.c index 424cc45ab..b47c643d2 100644 --- a/src/codegen/codegen_accumulate_x86.c +++ b/src/codegen/codegen_accumulate_x86.c @@ -45,9 +45,13 @@ void codegen_accumulate(int acc_reg, int delta) void codegen_accumulate_flush(void) { if (acc_regs[0].count) { - addbyte(0x81); /*ADD $acc_regs[0].count,acc_regs[0].dest*/ - addbyte(0x05); - addlong((uint32_t) acc_regs[0].dest_reg); + /* To reduce the size of the generated code, we take advantage of + the fact that the target offset points to _cycles within cpu_state, + so we can just use our existing infrastracture for variables + relative to cpu_state. */ + addbyte(0x81); /*MOVL $acc_regs[0].count,(_cycles)*/ + addbyte(0x45); + addbyte((uint8_t)cpu_state_offset(_cycles)); addlong(acc_regs[0].count); }