From fc53fceda729ce8ec9c007292d54e3e75c3cfc07 Mon Sep 17 00:00:00 2001 From: OBattler Date: Sun, 6 Nov 2022 04:50:13 +0100 Subject: [PATCH] Added AMD K6-2+ and K6-III+ 3DNow! DSP Extensions. --- src/cpu/cpu.c | 12 +++++- src/cpu/cpu.h | 15 +++---- src/cpu/x86_ops.h | 2 + src/cpu/x86_ops_3dnow.h | 87 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 107 insertions(+), 9 deletions(-) diff --git a/src/cpu/cpu.c b/src/cpu/cpu.c index 940555dd2..aea3a935d 100644 --- a/src/cpu/cpu.c +++ b/src/cpu/cpu.c @@ -68,7 +68,8 @@ enum { }; /*Addition flags returned by CPUID function 0x80000001*/ -#define CPUID_3DNOW (1UL << 31UL) +#define CPUID_3DNOW (1UL << 31UL) +#define CPUID_3DNOWE (1UL << 30UL) /* Make sure this is as low as possible. */ cpu_state_t cpu_state; @@ -1167,6 +1168,11 @@ cpu_set(void) # endif #endif + if ((cpu_s->cpu_type == CPU_K6_2P) || (cpu_s->cpu_type == CPU_K6_3P)) { + x86_opcodes_3DNOW = ops_3DNOWE; + x86_dynarec_opcodes_3DNOW = dynarec_ops_3DNOWE; + } + timing_rr = 1; /* register dest - register src */ timing_rm = 2; /* register dest - memory src */ timing_mr = 3; /* memory dest - register src */ @@ -1202,6 +1208,8 @@ cpu_set(void) cpu_features = CPU_FEATURE_RDTSC | CPU_FEATURE_MSR | CPU_FEATURE_CR4 | CPU_FEATURE_VME | CPU_FEATURE_MMX; if (cpu_s->cpu_type >= CPU_K6_2) cpu_features |= CPU_FEATURE_3DNOW; + if ((cpu_s->cpu_type == CPU_K6_2P) || (cpu_s->cpu_type == CPU_K6_3P)) + cpu_features |= CPU_FEATURE_3DNOWE; msr.fcr = (1 << 8) | (1 << 9) | (1 << 12) | (1 << 16) | (1 << 19) | (1 << 21); #if defined(DEV_BRANCH) && defined(USE_AMD_K5) cpu_CR4_mask = CR4_TSD | CR4_DE | CR4_MCE; @@ -1831,7 +1839,7 @@ cpu_CPUID(void) case 0x80000001: EAX = CPUID + 0x100; EBX = ECX = 0; - EDX = CPUID_FPU | CPUID_VME | CPUID_PSE | CPUID_TSC | CPUID_MSR | CPUID_MCE | CPUID_CMPXCHG8B | CPUID_AMDSEP | CPUID_MMX | CPUID_3DNOW; + EDX = CPUID_FPU | CPUID_VME | CPUID_PSE | CPUID_TSC | CPUID_MSR | CPUID_MCE | CPUID_CMPXCHG8B | CPUID_AMDSEP | CPUID_MMX | CPUID_3DNOW | CPUID_3DNOWE; break; case 0x80000002: /* Processor name string */ EAX = 0x2d444d41; /* AMD-K6(tm)-III P */ diff --git a/src/cpu/cpu.h b/src/cpu/cpu.h index b8e10661c..fb35c88ec 100644 --- a/src/cpu/cpu.h +++ b/src/cpu/cpu.h @@ -503,14 +503,15 @@ extern int isibm486; extern int is_nec; extern int is_rapidcad; extern int hasfpu; -#define CPU_FEATURE_RDTSC (1 << 0) -#define CPU_FEATURE_MSR (1 << 1) -#define CPU_FEATURE_MMX (1 << 2) -#define CPU_FEATURE_CR4 (1 << 3) -#define CPU_FEATURE_VME (1 << 4) -#define CPU_FEATURE_CX8 (1 << 5) -#define CPU_FEATURE_3DNOW (1 << 6) +#define CPU_FEATURE_RDTSC (1 << 0) +#define CPU_FEATURE_MSR (1 << 1) +#define CPU_FEATURE_MMX (1 << 2) +#define CPU_FEATURE_CR4 (1 << 3) +#define CPU_FEATURE_VME (1 << 4) +#define CPU_FEATURE_CX8 (1 << 5) +#define CPU_FEATURE_3DNOW (1 << 6) #define CPU_FEATURE_SYSCALL (1 << 7) +#define CPU_FEATURE_3DNOWE (1 << 8) extern uint32_t cpu_features; diff --git a/src/cpu/x86_ops.h b/src/cpu/x86_ops.h index bb540ea22..a1ab95125 100644 --- a/src/cpu/x86_ops.h +++ b/src/cpu/x86_ops.h @@ -148,6 +148,7 @@ extern const OpFn dynarec_ops_fpu_686_df_a32[256]; extern const OpFn dynarec_ops_REPE[1024]; extern const OpFn dynarec_ops_REPNE[1024]; extern const OpFn dynarec_ops_3DNOW[256]; +extern const OpFn dynarec_ops_3DNOWE[256]; #else void x86_setopcodes(const OpFn *opcodes, const OpFn *opcodes_0f); #endif @@ -250,6 +251,7 @@ extern const OpFn ops_fpu_686_df_a32[256]; extern const OpFn ops_REPE[1024]; extern const OpFn ops_REPNE[1024]; extern const OpFn ops_3DNOW[256]; +extern const OpFn ops_3DNOWE[256]; #define C0 (1<<8) #define C1 (1<<9) diff --git a/src/cpu/x86_ops_3dnow.h b/src/cpu/x86_ops_3dnow.h index 8131967ca..d24a6cc33 100644 --- a/src/cpu/x86_ops_3dnow.h +++ b/src/cpu/x86_ops_3dnow.h @@ -58,6 +58,17 @@ static int opPF2ID(uint32_t fetchdat) return 0; } +static int opPF2IW(uint32_t fetchdat) +{ + MMX_REG src; + + MMX_GETSRC(); + + cpu_state.MM[cpu_reg].sw[0] = (int32_t)src.f[0]; + cpu_state.MM[cpu_reg].sw[1] = (int32_t)src.f[1]; + + return 0; +} static int opPFACC(uint32_t fetchdat) { MMX_REG src; @@ -71,6 +82,47 @@ static int opPFACC(uint32_t fetchdat) return 0; } +static int opPFNACC(uint32_t fetchdat) +{ + MMX_REG src; + float tempf; + + MMX_GETSRC(); + + tempf = cpu_state.MM[cpu_reg].f[0] - cpu_state.MM[cpu_reg].f[1]; + cpu_state.MM[cpu_reg].f[1] = src.f[0] - src.f[1]; + cpu_state.MM[cpu_reg].f[0] = tempf; + + return 0; +} +static int opPFPNACC(uint32_t fetchdat) +{ + MMX_REG src; + float tempf; + + MMX_GETSRC(); + + tempf = cpu_state.MM[cpu_reg].f[0] - cpu_state.MM[cpu_reg].f[1]; + cpu_state.MM[cpu_reg].f[1] = src.f[0] + src.f[1]; + cpu_state.MM[cpu_reg].f[0] = tempf; + + return 0; +} +static int opPSWAPD(uint32_t fetchdat) +{ + MMX_REG src; + float tempf, tempf2; + + MMX_GETSRC(); + + /* We have to do this in case source and destination overlap. */ + tempf = src.f[0]; + tempf2 = src.f[1]; + cpu_state.MM[cpu_reg].f[1] = tempf; + cpu_state.MM[cpu_reg].f[0] = tempf2; + + return 0; +} static int opPFADD(uint32_t fetchdat) { MMX_REG src; @@ -268,6 +320,17 @@ static int opPI2FD(uint32_t fetchdat) return 0; } +static int opPI2FW(uint32_t fetchdat) +{ + MMX_REG src; + + MMX_GETSRC(); + + cpu_state.MM[cpu_reg].f[0] = (float)src.sw[0]; + cpu_state.MM[cpu_reg].f[1] = (float)src.sw[1]; + + return 0; +} static int opPMULHRW(uint32_t fetchdat) { if (cpu_mod == 3) @@ -318,6 +381,30 @@ const OpFn OP_TABLE(3DNOW)[256] = /*f0*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, }; +const OpFn OP_TABLE(3DNOWE)[256] = +{ +/* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f*/ +/*00*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, opPI2FW, opPI2FD, ILLEGAL, ILLEGAL, +/*10*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, opPF2IW, opPF2ID, ILLEGAL, ILLEGAL, +/*20*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, +/*30*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + +/*40*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, +/*50*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, +/*60*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, +/*70*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, + +/*80*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, opPFNACC, ILLEGAL, ILLEGAL, ILLEGAL, opPFPNACC, ILLEGAL, +/*90*/ opPFCMPGE, ILLEGAL, ILLEGAL, ILLEGAL, opPFMIN, ILLEGAL, opPFRCP, opPFRSQRT, ILLEGAL, ILLEGAL, opPFSUB, ILLEGAL, ILLEGAL, ILLEGAL, opPFADD, ILLEGAL, +/*a0*/ opPFCMPGT, ILLEGAL, ILLEGAL, ILLEGAL, opPFMAX, ILLEGAL, opPFRCPIT1, opPFRSQIT1, ILLEGAL, ILLEGAL, opPFSUBR, ILLEGAL, ILLEGAL, ILLEGAL, opPFACC, ILLEGAL, +/*b0*/ opPFCMPEQ, ILLEGAL, ILLEGAL, ILLEGAL, opPFMUL, ILLEGAL, opPFRCPIT2, opPMULHRW, ILLEGAL, ILLEGAL, ILLEGAL, opPSWAPD, ILLEGAL, ILLEGAL, ILLEGAL, opPAVGUSB, + +/*c0*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, +/*d0*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, +/*e0*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, +/*f0*/ ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, ILLEGAL, +}; + static int op3DNOW_a16(uint32_t fetchdat) { uint8_t opcode;