diff --git a/src/cpu/x86_ops_i686.h b/src/cpu/x86_ops_i686.h
index 9e00249ca..f2b07a1c4 100644
--- a/src/cpu/x86_ops_i686.h
+++ b/src/cpu/x86_ops_i686.h
@@ -43,6 +43,132 @@ opSYSEXIT(uint32_t fetchdat)
     return ret;
 }
 
+static int
+sf_fx_save_stor_common(uint32_t fetchdat, int bits)
+{
+    uint8_t   fxinst     = 0;
+    uint32_t  tag_byte;
+    unsigned  index;
+    floatx80  reg;
+
+    if (CPUID < 0x650)
+        return ILLEGAL(fetchdat);
+
+    FP_ENTER();
+
+    if (bits == 32) {
+        fetch_ea_32(fetchdat);
+    } else {
+        fetch_ea_16(fetchdat);
+    }
+
+    if (cpu_state.eaaddr & 0xf) {
+        x386_dynarec_log("Effective address %08X not on 16-byte boundary\n", cpu_state.eaaddr);
+        x86gpf(NULL, 0);
+        return cpu_state.abrt;
+    }
+
+    fxinst = (rmdat >> 3) & 7;
+
+    if ((fxinst > 1) || (cpu_mod == 3)) {
+        x86illegal();
+        return cpu_state.abrt;
+    }
+
+    FP_ENTER();
+
+    if (fxinst == 1) {
+        /* FXRSTOR */
+        fpu_state.cwd = readmemw(easeg, cpu_state.eaaddr);
+        fpu_state.swd = readmemw(easeg, cpu_state.eaaddr + 2);
+        fpu_state.tos = (fpu_state.swd >> 11) & 7;
+
+        /* always set bit 6 as '1 */
+        fpu_state.cwd = (fpu_state.cwd & ~FPU_CW_Reserved_Bits) | 0x0040;
+
+        /* Restore x87 FPU Opcode */
+        /* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */
+        fpu_state.foo = readmemw(easeg, cpu_state.eaaddr + 6) & 0x7FF;
+
+        fpu_state.fip = readmeml(easeg, cpu_state.eaaddr + 8);
+        fpu_state.fcs = readmemw(easeg, cpu_state.eaaddr + 12);
+
+        tag_byte = readmemb(easeg, cpu_state.eaaddr + 4);
+
+        fpu_state.fdp = readmeml(easeg, cpu_state.eaaddr + 16);
+        fpu_state.fds = readmemw(easeg, cpu_state.eaaddr + 20);
+
+        /* load i387 register file */
+        for (index = 0; index < 8; index++) {
+            reg.fraction = readmemq(easeg, cpu_state.eaaddr + (index * 16) + 32);
+            reg.exp      = readmemw(easeg, cpu_state.eaaddr + (index * 16) + 40);
+
+            // update tag only if it is not empty
+            FPU_save_regi_tag(reg, IS_TAG_EMPTY(index) ? X87_TAG_EMPTY : FPU_tagof(reg), index);
+        }
+
+        fpu_state.tag = unpack_FPU_TW(tag_byte);
+
+        /* check for unmasked exceptions */
+        if (fpu_state.swd & ~fpu_state.cwd & FPU_CW_Exceptions_Mask) {
+            /* set the B and ES bits in the status-word */
+            fpu_state.swd |= (FPU_SW_Summary | FPU_SW_Backward);
+        } else {
+            /* clear the B and ES bits in the status-word */
+            fpu_state.swd &= ~(FPU_SW_Summary | FPU_SW_Backward);
+        }
+
+        CLOCK_CYCLES((cr0 & 1) ? 34 : 44);
+    } else {
+        /* FXSAVE */
+        writememw(easeg, cpu_state.eaaddr, i387_get_control_word());
+        writememw(easeg, cpu_state.eaaddr + 2, i387_get_status_word());
+        writememw(easeg, cpu_state.eaaddr + 4, pack_FPU_TW(fpu_state.tag));
+
+        /* x87 FPU Opcode (16 bits) */
+        /* The lower 11 bits contain the FPU opcode, upper 5 bits are reserved */
+        writememw(easeg, cpu_state.eaaddr + 6, fpu_state.foo);
+
+      /*
+       * x87 FPU IP Offset (32/64 bits)
+       * The contents of this field differ depending on the current
+       * addressing mode (16/32/64 bit) when the FXSAVE instruction was executed:
+       *   + 64-bit mode - 64-bit IP offset
+       *   + 32-bit mode - 32-bit IP offset
+       *   + 16-bit mode - low 16 bits are IP offset; high 16 bits are reserved.
+       * x87 CS FPU IP Selector
+       *   + 16 bit, in 16/32 bit mode only
+       */
+        writememl(easeg, cpu_state.eaaddr + 8, fpu_state.fip);
+        writememl(easeg, cpu_state.eaaddr + 12, fpu_state.fcs);
+
+      /*
+       * x87 FPU Instruction Operand (Data) Pointer Offset (32/64 bits)
+       * The contents of this field differ depending on the current
+       * addressing mode (16/32 bit) when the FXSAVE instruction was executed:
+       *   + 64-bit mode - 64-bit offset
+       *   + 32-bit mode - 32-bit offset
+       *   + 16-bit mode - low 16 bits are offset; high 16 bits are reserved.
+       * x87 DS FPU Instruction Operand (Data) Pointer Selector
+       *   + 16 bit, in 16/32 bit mode only
+       */
+        writememl(easeg, cpu_state.eaaddr + 16, fpu_state.fdp);
+        writememl(easeg, cpu_state.eaaddr + 20, fpu_state.fds);
+
+       /* store i387 register file */
+        for (index = 0; index < 8; index++) {
+            const floatx80 fp = FPU_read_regi(index);
+
+            writememq(easeg, cpu_state.eaaddr + (index * 16) + 32, fp.fraction);
+            writememw(easeg, cpu_state.eaaddr + (index * 16) + 40, fp.exp);
+        }
+
+        CLOCK_CYCLES((cr0 & 1) ? 56 : 67);
+    }
+
+    return cpu_state.abrt;
+}
+
 static int
 fx_save_stor_common(uint32_t fetchdat, int bits)
 {
@@ -253,12 +379,18 @@ fx_save_stor_common(uint32_t fetchdat, int bits)
 static int
 opFXSAVESTOR_a16(uint32_t fetchdat)
 {
+    if (fpu_softfloat) 
+        return sf_fx_save_stor_common(fetchdat, 16);
+
     return fx_save_stor_common(fetchdat, 16);
 }
 
 static int
 opFXSAVESTOR_a32(uint32_t fetchdat)
 {
+    if (fpu_softfloat)
+        return sf_fx_save_stor_common(fetchdat, 32);
+
     return fx_save_stor_common(fetchdat, 32);
 }
 
diff --git a/src/cpu/x87.c b/src/cpu/x87.c
index 98ceb105b..181b7b9ca 100644
--- a/src/cpu/x87.c
+++ b/src/cpu/x87.c
@@ -439,6 +439,79 @@ FPU_tagof(const floatx80 reg)
     return X87_TAG_VALID;
 }
 
+uint8_t
+pack_FPU_TW(uint16_t twd)
+{
+    uint8_t tag_byte = 0;
+
+    if ((twd & 0x0003) != 0x0003) tag_byte |= 0x01;
+    if ((twd & 0x000c) != 0x000c) tag_byte |= 0x02;
+    if ((twd & 0x0030) != 0x0030) tag_byte |= 0x04;
+    if ((twd & 0x00c0) != 0x00c0) tag_byte |= 0x08;
+    if ((twd & 0x0300) != 0x0300) tag_byte |= 0x10;
+    if ((twd & 0x0c00) != 0x0c00) tag_byte |= 0x20;
+    if ((twd & 0x3000) != 0x3000) tag_byte |= 0x40;
+    if ((twd & 0xc000) != 0xc000) tag_byte |= 0x80;
+
+    return tag_byte;
+}
+
+uint16_t
+unpack_FPU_TW(uint16_t tag_byte)
+{
+    uint32_t twd = 0;
+
+  /*                                 FTW
+   *
+   * Note that the original format for FTW can be recreated from the stored
+   * FTW valid bits and the stored 80-bit FP data (assuming the stored data
+   * was not the contents of MMX registers) using the following table:
+
+     | Exponent | Exponent | Fraction | J,M bits | FTW valid | x87 FTW |
+     |  all 1s  |  all 0s  |  all 0s  |          |           |         |
+     -------------------------------------------------------------------
+     |    0     |    0     |    0     |    0x    |     1     | S    10 |
+     |    0     |    0     |    0     |    1x    |     1     | V    00 |
+     -------------------------------------------------------------------
+     |    0     |    0     |    1     |    00    |     1     | S    10 |
+     |    0     |    0     |    1     |    10    |     1     | V    00 |
+     -------------------------------------------------------------------
+     |    0     |    1     |    0     |    0x    |     1     | S    10 |
+     |    0     |    1     |    0     |    1x    |     1     | S    10 |
+     -------------------------------------------------------------------
+     |    0     |    1     |    1     |    00    |     1     | Z    01 |
+     |    0     |    1     |    1     |    10    |     1     | S    10 |
+     -------------------------------------------------------------------
+     |    1     |    0     |    0     |    1x    |     1     | S    10 |
+     |    1     |    0     |    0     |    1x    |     1     | S    10 |
+     -------------------------------------------------------------------
+     |    1     |    0     |    1     |    00    |     1     | S    10 |
+     |    1     |    0     |    1     |    10    |     1     | S    10 |
+     -------------------------------------------------------------------
+     |        all combinations above             |     0     | E    11 |
+
+   *
+   * The J-bit is defined to be the 1-bit binary integer to the left of
+   * the decimal place in the significand.
+   *
+   * The M-bit is defined to be the most significant bit of the fractional
+   * portion of the significand (i.e., the bit immediately to the right of
+   * the decimal place). When the M-bit is the most significant bit of the
+   * fractional portion  of the significand, it must be  0 if the fraction
+   * is all 0's.
+   */
+
+    for (int index = 7; index >= 0; index--, twd <<= 2, tag_byte <<= 1) {
+        if (tag_byte & 0x80) {
+            const floatx80 *fpu_reg = &fpu_state.st_space[index & 7];
+            twd |= FPU_tagof(*fpu_reg);
+        } else {
+            twd |= X87_TAG_EMPTY;
+        }
+    }
+
+    return (twd >> 2);
+}
 
 #ifdef ENABLE_808X_LOG
 void
diff --git a/src/cpu/x87.h b/src/cpu/x87.h
index 5d460bc4b..8fab28ce8 100644
--- a/src/cpu/x87.h
+++ b/src/cpu/x87.h
@@ -141,6 +141,8 @@ void FPU_stack_underflow(uint32_t fetchdat, int stnr, int pop_stack);
 int FPU_handle_NaN32(floatx80 a, float32 b, floatx80 *r, struct float_status_t *status);
 int FPU_handle_NaN64(floatx80 a, float64 b, floatx80 *r, struct float_status_t *status);
 int FPU_tagof(const floatx80 reg);
+uint8_t pack_FPU_TW(uint16_t twd);
+uint16_t unpack_FPU_TW(uint16_t tag_byte);
 
 static __inline uint16_t
 i387_get_control_word(void)