about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-03-17 17:46:30 +0800
committerGitHub <noreply@github.com>2025-03-17 10:46:30 +0100
commit18888e404e4d91abfa597c5358570c3976c704e5 (patch)
tree576af19aad335ccf9a76e5251c6abf65caf9976f /src
parent394513971cf97619f34de6f84a5792d8ecd8f9c7 (diff)
downloadbox64-18888e404e4d91abfa597c5358570c3976c704e5.tar.gz
box64-18888e404e4d91abfa597c5358570c3976c704e5.zip
[RV64_DYNAREC] Minor D8..DF opcodes refactor (#2442)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_d8.c435
-rw-r--r--src/dynarec/rv64/dynarec_rv64_d9.c541
-rw-r--r--src/dynarec/rv64/dynarec_rv64_da.c367
-rw-r--r--src/dynarec/rv64/dynarec_rv64_db.c479
-rw-r--r--src/dynarec/rv64/dynarec_rv64_dc.c346
-rw-r--r--src/dynarec/rv64/dynarec_rv64_dd.c305
-rw-r--r--src/dynarec/rv64/dynarec_rv64_de.c282
-rw-r--r--src/dynarec/rv64/dynarec_rv64_df.c500
8 files changed, 1552 insertions, 1703 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_d8.c b/src/dynarec/rv64/dynarec_rv64_d8.c
index 94503ea9..6dd32420 100644
--- a/src/dynarec/rv64/dynarec_rv64_d8.c
+++ b/src/dynarec/rv64/dynarec_rv64_d8.c
@@ -43,221 +43,224 @@ uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(v2);
     MAYUSE(v1);
 
-    switch (nextop) {
-        case 0xC0 ... 0xC7:
-            INST_NAME("FADD ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-            if (ST_IS_F(0)) {
-                FADDS(v1, v1, v2);
-            } else {
-                FADDD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xC8 ... 0xCF:
-            INST_NAME("FMUL ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-            if (ST_IS_F(0)) {
-                FMULS(v1, v1, v2);
-            } else {
-                FMULD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xD0 ... 0xD7:
-            INST_NAME("FCOM ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            break;
-        case 0xD8 ... 0xDF:
-            INST_NAME("FCOMP ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xE0 ... 0xE7:
-            INST_NAME("FSUB ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-            if (ST_IS_F(0)) {
-                FSUBS(v1, v1, v2);
-            } else {
-                FSUBD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xE8 ... 0xEF:
-            INST_NAME("FSUBR ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-            if (ST_IS_F(0)) {
-                FSUBS(v1, v2, v1);
-            } else {
-                FSUBD(v1, v2, v1);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xF0 ... 0xF7:
-            INST_NAME("FDIV ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-            if (ST_IS_F(0)) {
-                FDIVS(v1, v1, v2);
-            } else {
-                FDIVD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xF8 ... 0xFF:
-            INST_NAME("FDIVR ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-            if (ST_IS_F(0)) {
-                FDIVS(v1, v2, v1);
-            } else {
-                FDIVD(v1, v2, v1);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-
-        default:
-            switch ((nextop >> 3) & 7) {
-                case 0:
-                    INST_NAME("FADD ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(s0, ed, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-                    if (ST_IS_F(0)) {
-                        FADDS(v1, v1, s0);
-                    } else {
-                        FCVTDS(s0, s0);
-                        FADDD(v1, v1, s0);
-                    }
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 1:
-                    INST_NAME("FMUL ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(s0, ed, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-                    if (ST_IS_F(0)) {
-                        FMULS(v1, v1, s0);
-                    } else {
-                        FCVTDS(s0, s0);
-                        FMULD(v1, v1, s0);
-                    }
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 2:
-                    INST_NAME("FCOM ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(s0, ed, fixedaddress);
-                    if (ST_IS_F(0)) {
-                        FCOMS(v1, s0, x1, x6, x3, x4, x5);
-                    } else {
-                        FCVTDS(s0, s0);
-                        FCOMD(v1, s0, x1, x6, x3, x4, x5);
-                    }
-                    break;
-                case 3:
-                    INST_NAME("FCOMP ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(s0, ed, fixedaddress);
-                    if (ST_IS_F(0)) {
-                        FCOMS(v1, s0, x1, x6, x3, x4, x5);
-                    } else {
-                        FCVTDS(s0, s0);
-                        FCOMD(v1, s0, x1, x6, x3, x4, x5);
-                    }
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FSUB ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(s0, ed, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-                    if (ST_IS_F(0)) {
-                        FSUBS(v1, v1, s0);
-                    } else {
-                        FCVTDS(s0, s0);
-                        FSUBD(v1, v1, s0);
-                    }
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 5:
-                    INST_NAME("FSUBR ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(s0, ed, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-                    if (ST_IS_F(0)) {
-                        FSUBS(v1, s0, v1);
-                    } else {
-                        FCVTDS(s0, s0);
-                        FSUBD(v1, s0, v1);
-                    }
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 6:
-                    INST_NAME("FDIV ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(s0, ed, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-                    if (ST_IS_F(0)) {
-                        FDIVS(v1, v1, s0);
-                    } else {
-                        FCVTDS(s0, s0);
-                        FDIVD(v1, v1, s0);
-                    }
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 7:
-                    INST_NAME("FDIVR ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(s0, ed, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-                    if (ST_IS_F(0)) {
-                        FDIVS(v1, s0, v1);
-                    } else {
-                        FCVTDS(s0, s0);
-                        FDIVD(v1, s0, v1);
-                    }
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-            }
-    }
+    if (MODREG)
+        switch (nextop) {
+            case 0xC0 ... 0xC7:
+                INST_NAME("FADD ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FADDS(v1, v1, v2);
+                } else {
+                    FADDD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xC8 ... 0xCF:
+                INST_NAME("FMUL ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FMULS(v1, v1, v2);
+                } else {
+                    FMULD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xD0 ... 0xD7:
+                INST_NAME("FCOM ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                break;
+            case 0xD8 ... 0xDF:
+                INST_NAME("FCOMP ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xE0 ... 0xE7:
+                INST_NAME("FSUB ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FSUBS(v1, v1, v2);
+                } else {
+                    FSUBD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xE8 ... 0xEF:
+                INST_NAME("FSUBR ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FSUBS(v1, v2, v1);
+                } else {
+                    FSUBD(v1, v2, v1);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xF0 ... 0xF7:
+                INST_NAME("FDIV ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FDIVS(v1, v1, v2);
+                } else {
+                    FDIVD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xF8 ... 0xFF:
+                INST_NAME("FDIVR ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FDIVS(v1, v2, v1);
+                } else {
+                    FDIVD(v1, v2, v1);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            default:
+                DEFAULT;
+                break;
+        }
+    else
+        switch ((nextop >> 3) & 7) {
+            case 0:
+                INST_NAME("FADD ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(s0, ed, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FADDS(v1, v1, s0);
+                } else {
+                    FCVTDS(s0, s0);
+                    FADDD(v1, v1, s0);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 1:
+                INST_NAME("FMUL ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(s0, ed, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FMULS(v1, v1, s0);
+                } else {
+                    FCVTDS(s0, s0);
+                    FMULD(v1, v1, s0);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 2:
+                INST_NAME("FCOM ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(s0, ed, fixedaddress);
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, s0, x1, x6, x3, x4, x5);
+                } else {
+                    FCVTDS(s0, s0);
+                    FCOMD(v1, s0, x1, x6, x3, x4, x5);
+                }
+                break;
+            case 3:
+                INST_NAME("FCOMP ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(s0, ed, fixedaddress);
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, s0, x1, x6, x3, x4, x5);
+                } else {
+                    FCVTDS(s0, s0);
+                    FCOMD(v1, s0, x1, x6, x3, x4, x5);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FSUB ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(s0, ed, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FSUBS(v1, v1, s0);
+                } else {
+                    FCVTDS(s0, s0);
+                    FSUBD(v1, v1, s0);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 5:
+                INST_NAME("FSUBR ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(s0, ed, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FSUBS(v1, s0, v1);
+                } else {
+                    FCVTDS(s0, s0);
+                    FSUBD(v1, s0, v1);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 6:
+                INST_NAME("FDIV ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(s0, ed, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FDIVS(v1, v1, s0);
+                } else {
+                    FCVTDS(s0, s0);
+                    FDIVD(v1, v1, s0);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 7:
+                INST_NAME("FDIVR ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(s0, ed, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                if (ST_IS_F(0)) {
+                    FDIVS(v1, s0, v1);
+                } else {
+                    FCVTDS(s0, s0);
+                    FDIVD(v1, s0, v1);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+        }
     return addr;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c
index 52c8ec10..f337a020 100644
--- a/src/dynarec/rv64/dynarec_rv64_d9.c
+++ b/src/dynarec/rv64/dynarec_rv64_d9.c
@@ -46,220 +46,201 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(v2);
     MAYUSE(j64);
 
-    switch (nextop) {
-        case 0xC0:
-        case 0xC1:
-        case 0xC2:
-        case 0xC3:
-        case 0xC4:
-        case 0xC5:
-        case 0xC6:
-        case 0xC7:
-            INST_NAME("FLD STx");
-            X87_PUSH_OR_FAIL(v2, dyn, ninst, x1, X87_ST(nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, (nextop & 7) + 1, X87_COMBINE(0, (nextop & 7) + 1));
-            if (ST_IS_F(0)) {
-                FMVS(v2, v1);
-            } else {
-                FMVD(v2, v1);
-            }
-            break;
+    if (MODREG)
+        switch (nextop) {
+            case 0xC0 ... 0xC7:
+                INST_NAME("FLD STx");
+                X87_PUSH_OR_FAIL(v2, dyn, ninst, x1, X87_ST(nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, (nextop & 7) + 1, X87_COMBINE(0, (nextop & 7) + 1));
+                if (ST_IS_F(0)) {
+                    FMVS(v2, v1);
+                } else {
+                    FMVD(v2, v1);
+                }
+                break;
 
-        case 0xC8:
-            INST_NAME("FXCH ST0");
-            break;
-        case 0xC9:
-        case 0xCA:
-        case 0xCB:
-        case 0xCC:
-        case 0xCD:
-        case 0xCE:
-        case 0xCF:
-            INST_NAME("FXCH STx");
-            // swap the cache value, not the double value itself :p
-            x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7));
-            x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-            x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7);
-            // should set C1 to 0
-            break;
+            case 0xC8:
+                INST_NAME("FXCH ST0");
+                break;
+            case 0xC9 ... 0xCF:
+                INST_NAME("FXCH STx");
+                // swap the cache value, not the double value itself :p
+                x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7));
+                x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7);
+                // should set C1 to 0
+                break;
 
-        case 0xD0:
-            INST_NAME("FNOP");
-            break;
+            case 0xD0:
+                INST_NAME("FNOP");
+                break;
 
-        case 0xD8:
-            INST_NAME("FSTPNCE ST0, ST0");
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xD9:
-        case 0xDA:
-        case 0xDB:
-        case 0xDC:
-        case 0xDD:
-        case 0xDE:
-        case 0xDF:
-            INST_NAME("FSTPNCE ST0, STx");
-            // copy the cache value for st0 to stx
-            x87_get_st_empty(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7));
-            x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-            x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xE0:
-            INST_NAME("FCHS");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-            if (ST_IS_F(0)) {
-                FNEGS(v1, v1);
-            } else {
-                FNEGD(v1, v1);
-            }
-            break;
-        case 0xE1:
-            INST_NAME("FABS");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-            if (ST_IS_F(0)) {
-                FABSS(v1, v1);
-            } else {
-                FABSD(v1, v1);
-            }
-            break;
+            case 0xD8:
+                INST_NAME("FSTPNCE ST0, ST0");
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xD9 ... 0xDF:
+                INST_NAME("FSTPNCE ST0, STx");
+                // copy the cache value for st0 to stx
+                x87_get_st_empty(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7));
+                x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xE0:
+                INST_NAME("FCHS");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                if (ST_IS_F(0)) {
+                    FNEGS(v1, v1);
+                } else {
+                    FNEGD(v1, v1);
+                }
+                break;
+            case 0xE1:
+                INST_NAME("FABS");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                if (ST_IS_F(0)) {
+                    FABSS(v1, v1);
+                } else {
+                    FABSD(v1, v1);
+                }
+                break;
 
-        case 0xE4:
-            INST_NAME("FTST");
-            DEFAULT
-            break;
-        case 0xE5:
-            INST_NAME("FXAM");
+            case 0xE4:
+                INST_NAME("FTST");
+                DEFAULT;
+                break;
+            case 0xE5:
+                INST_NAME("FXAM");
 #if 1
-            i1 = x87_get_current_cache(dyn, ninst, 0, EXT_CACHE_ST_D);
-            // value put in x14
-            if (i1 == -1) {
-                if (fpu_is_st_freed(dyn, ninst, 0)) {
-                    MOV32w(x4, 0b100000100000000);
-                    B_MARK3_nocond;
-                } else {
-                    // not in cache, so check Empty status and load it
-                    i2 = -dyn->e.x87stack;
-                    LWU(x3, xEmu, offsetof(x64emu_t, fpu_stack));
-                    if (i2) {
-                        ADDI(x3, x3, i2);
-                    }
-                    MOV32w(x4, 0b100000100000000); // empty: C3,C2,C0 = 101
-                    BGE_MARK3(xZR, x3);
-                    // x5 will be the actual top
-                    LWU(x5, xEmu, offsetof(x64emu_t, top));
-                    if (i2) {
-                        ADDI(x5, x5, i2);
-                        ANDI(x5, x5, 7); // (emu->top + i)&7
-                    }
-                    // load x2 with ST0 anyway, for sign extraction
-                    if (rv64_zba)
-                        SH3ADD(x1, x5, xEmu);
-                    else {
-                        SLLI(x5, x5, 3);
-                        ADD(x1, xEmu, x5);
-                    }
-                    LD(x2, x1, offsetof(x64emu_t, x87));
-                    // load tag
-                    if (i2 >= 0) {
-                        LHU(x3, xEmu, offsetof(x64emu_t, fpu_tags));
-                        if (i2 > 0) {
-                            LUI(x5, 0xffff0);
-                            OR(x3, x3, x5);
-                            SRLI(x3, x3, i2 * 2);
+                i1 = x87_get_current_cache(dyn, ninst, 0, EXT_CACHE_ST_D);
+                // value put in x14
+                if (i1 == -1) {
+                    if (fpu_is_st_freed(dyn, ninst, 0)) {
+                        MOV32w(x4, 0b100000100000000);
+                        B_MARK3_nocond;
+                    } else {
+                        // not in cache, so check Empty status and load it
+                        i2 = -dyn->e.x87stack;
+                        LWU(x3, xEmu, offsetof(x64emu_t, fpu_stack));
+                        if (i2) {
+                            ADDI(x3, x3, i2);
+                        }
+                        MOV32w(x4, 0b100000100000000); // empty: C3,C2,C0 = 101
+                        BGE_MARK3(xZR, x3);
+                        // x5 will be the actual top
+                        LWU(x5, xEmu, offsetof(x64emu_t, top));
+                        if (i2) {
+                            ADDI(x5, x5, i2);
+                            ANDI(x5, x5, 7); // (emu->top + i)&7
+                        }
+                        // load x2 with ST0 anyway, for sign extraction
+                        if (rv64_zba)
+                            SH3ADD(x1, x5, xEmu);
+                        else {
+                            SLLI(x5, x5, 3);
+                            ADD(x1, xEmu, x5);
+                        }
+                        LD(x2, x1, offsetof(x64emu_t, x87));
+                        // load tag
+                        if (i2 >= 0) {
+                            LHU(x3, xEmu, offsetof(x64emu_t, fpu_tags));
+                            if (i2 > 0) {
+                                LUI(x5, 0xffff0);
+                                OR(x3, x3, x5);
+                                SRLI(x3, x3, i2 * 2);
+                            }
+                            ANDI(x3, x3, 0b11);
+                            BNEZ_MARK3(x3); // empty: C3,C2,C0 = 101
                         }
-                        ANDI(x3, x3, 0b11);
-                        BNEZ_MARK3(x3); // empty: C3,C2,C0 = 101
                     }
+                } else {
+                    // simply move from cache reg to x2
+                    v1 = dyn->e.x87reg[i1];
+                    FMVXD(x2, v1);
                 }
-            } else {
-                // simply move from cache reg to x2
-                v1 = dyn->e.x87reg[i1];
-                FMVXD(x2, v1);
-            }
-            // get exponant in x1
-            SRLI(x1, x2, 20 + 32);
-            ANDI(x1, x1, 0x7ff); // 0x7ff
-            BNEZ_MARK(x1);       // not zero or denormal
-            MOV64x(x3, 0x7fffffffffffffff);
-            AND(x1, x2, x3);
-            MOV32w(x4, 0b100000000000000); // Zero: C3,C2,C0 = 100
-            BEQZ_MARK3(x1);
-            MOV32w(x4, 0b100010000000000); // Denormal: C3,C2,C0 = 110
-            B_MARK3_nocond;
-            MARK;
-            ADDI(x3, xZR, 0x7ff);          // infinite/NaN?
-            MOV32w(x4, 0b000010000000000); // normal: C3,C2,C0 = 010
-            BNE_MARK3(x1, x3);
-            SLLI(x3, x2, 12);
-            SRLI(x3, x3, 12);              // and 0x000fffffffffffff
-            MOV32w(x4, 0b000010100000000); // infinity: C3,C2,C0 = 011
-            BEQZ_MARK3(x3);
-            MOV32w(x4, 0b000000100000000); // NaN: C3,C2,C0 = 001
-            MARK3;
-            // Extract signa & Update SW
-            SRLI(x1, x2, 63);
-            SLLI(x1, x1, 9);
-            OR(x4, x4, x1); // C1
-            LHU(x1, xEmu, offsetof(x64emu_t, sw));
-            MOV32w(x2, ~0b0100011100000000);
-            AND(x1, x1, x2);
-            OR(x4, x4, x1);
-            SH(x4, xEmu, offsetof(x64emu_t, sw));
+                // get exponant in x1
+                SRLI(x1, x2, 20 + 32);
+                ANDI(x1, x1, 0x7ff); // 0x7ff
+                BNEZ_MARK(x1);       // not zero or denormal
+                MOV64x(x3, 0x7fffffffffffffff);
+                AND(x1, x2, x3);
+                MOV32w(x4, 0b100000000000000); // Zero: C3,C2,C0 = 100
+                BEQZ_MARK3(x1);
+                MOV32w(x4, 0b100010000000000); // Denormal: C3,C2,C0 = 110
+                B_MARK3_nocond;
+                MARK;
+                ADDI(x3, xZR, 0x7ff);          // infinite/NaN?
+                MOV32w(x4, 0b000010000000000); // normal: C3,C2,C0 = 010
+                BNE_MARK3(x1, x3);
+                SLLI(x3, x2, 12);
+                SRLI(x3, x3, 12);              // and 0x000fffffffffffff
+                MOV32w(x4, 0b000010100000000); // infinity: C3,C2,C0 = 011
+                BEQZ_MARK3(x3);
+                MOV32w(x4, 0b000000100000000); // NaN: C3,C2,C0 = 001
+                MARK3;
+                // Extract signa & Update SW
+                SRLI(x1, x2, 63);
+                SLLI(x1, x1, 9);
+                OR(x4, x4, x1); // C1
+                LHU(x1, xEmu, offsetof(x64emu_t, sw));
+                MOV32w(x2, ~0b0100011100000000);
+                AND(x1, x1, x2);
+                OR(x4, x4, x1);
+                SH(x4, xEmu, offsetof(x64emu_t, sw));
 #else
-            MESSAGE(LOG_DUMP, "Need Optimization\n");
-            x87_refresh(dyn, ninst, x1, x2, 0);
-            s0 = x87_stackcount(dyn, ninst, x1);
-            CALL(fpu_fxam, -1, 0, 0); // should be possible inline, but is it worth it?
-            x87_unstackcount(dyn, ninst, x1, s0);
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                x87_refresh(dyn, ninst, x1, x2, 0);
+                s0 = x87_stackcount(dyn, ninst, x1);
+                CALL(fpu_fxam, -1, 0, 0); // should be possible inline, but is it worth it?
+                x87_unstackcount(dyn, ninst, x1, s0);
 #endif
-            break;
+                break;
 
-        case 0xE8:
-            INST_NAME("FLD1");
-            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
-            if (ST_IS_F(0)) {
-                MOV32w(x1, 0x3f800000);
-                FMVWX(v1, x1);
-            } else {
-                MOV64x(x1, 0x3FF0000000000000);
-                FMVDX(v1, x1);
-            }
-            break;
-        case 0xE9:
-            INST_NAME("FLDL2T");
-            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
-            FTABLE64(v1, L2T);
-            break;
-        case 0xEA:
-            INST_NAME("FLDL2E");
-            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
-            FTABLE64(v1, L2E);
-            break;
-        case 0xEB:
-            INST_NAME("FLDPI");
-            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
-            FTABLE64(v1, PI);
-            break;
-        case 0xEC:
-            INST_NAME("FLDLG2");
-            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
-            FTABLE64(v1, LG2);
-            break;
-        case 0xED:
-            INST_NAME("FLDLN2");
-            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
-            FTABLE64(v1, LN2);
-            break;
-        case 0xEE:
-            INST_NAME("FLDZ");
-            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
-            if (ST_IS_F(0)) {
-                FMVWX(v1, xZR);
-            } else {
-                FMVDX(v1, xZR);
-            }
-            break;
+            case 0xE8:
+                INST_NAME("FLD1");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
+                if (ST_IS_F(0)) {
+                    MOV32w(x1, 0x3f800000);
+                    FMVWX(v1, x1);
+                } else {
+                    MOV64x(x1, 0x3FF0000000000000);
+                    FMVDX(v1, x1);
+                }
+                break;
+            case 0xE9:
+                INST_NAME("FLDL2T");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                FTABLE64(v1, L2T);
+                break;
+            case 0xEA:
+                INST_NAME("FLDL2E");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                FTABLE64(v1, L2E);
+                break;
+            case 0xEB:
+                INST_NAME("FLDPI");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                FTABLE64(v1, PI);
+                break;
+            case 0xEC:
+                INST_NAME("FLDLG2");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                FTABLE64(v1, LG2);
+                break;
+            case 0xED:
+                INST_NAME("FLDLN2");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                FTABLE64(v1, LN2);
+                break;
+            case 0xEE:
+                INST_NAME("FLDZ");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
+                if (ST_IS_F(0))
+                    FMVWX(v1, xZR);
+                else
+                    FMVDX(v1, xZR);
+                break;
 
         case 0xF0:
             INST_NAME("F2XM1");
@@ -455,90 +436,78 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
-
-
-        case 0xD1:
-        case 0xD4:
-        case 0xD5:
-        case 0xD6:
-        case 0xD7:
-        case 0xE2:
-        case 0xE3:
-        case 0xE6:
-        case 0xE7:
-        case 0xEF:
+        default:
             DEFAULT;
             break;
-
-        default:
-            switch ((nextop >> 3) & 7) {
-                case 0:
-                    INST_NAME("FLD ST0, float[ED]");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLW(v1, ed, fixedaddress);
-                    if (!ST_IS_F(0)) {
-                        FCVTDS(v1, v1);
-                    }
-                    break;
-                case 2:
-                    INST_NAME("FST float[ED], ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
-                    if (ST_IS_F(0))
-                        s0 = v1;
-                    else {
-                        s0 = fpu_get_scratch(dyn);
-                        if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-                        FCVTSD(s0, v1);
-                        if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    }
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FSW(s0, ed, fixedaddress);
-                    break;
-                case 3:
-                    INST_NAME("FSTP float[ED], ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    if (!ST_IS_F(0)) {
-                        if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
-                        FCVTSD(v1, v1);
-                        if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    }
-                    FSW(v1, ed, fixedaddress);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FLDENV Ed");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                    MOV32w(x2, 0);
-                    CALL(fpu_loadenv, -1, ed, x2);
-                    break;
-                case 5:
-                    INST_NAME("FLDCW Ew");
-                    GETEW(x1, 0);
-                    SH(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8
-                    break;
-                case 6:
-                    INST_NAME("FNSTENV Ed");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                    MOV32w(x2, 0);
-                    CALL(fpu_savenv, -1, ed, x2);
-                    break;
-                case 7:
-                    INST_NAME("FNSTCW Ew");
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 0, 0);
-                    ed = x1;
-                    wb1 = 1;
-                    LH(x1, xEmu, offsetof(x64emu_t, cw));
-                    EWBACK;
-                    break;
-                default:
-                    DEFAULT;
-            }
-    }
+        }
+    else
+        switch ((nextop >> 3) & 7) {
+            case 0:
+                INST_NAME("FLD ST0, float[ED]");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, BOX64ENV(dynarec_x87double) ? EXT_CACHE_ST_D : EXT_CACHE_ST_F);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLW(v1, ed, fixedaddress);
+                if (!ST_IS_F(0)) {
+                    FCVTDS(v1, v1);
+                }
+                break;
+            case 2:
+                INST_NAME("FST float[ED], ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
+                if (ST_IS_F(0))
+                    s0 = v1;
+                else {
+                    s0 = fpu_get_scratch(dyn);
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                    FCVTSD(s0, v1);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                }
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FSW(s0, ed, fixedaddress);
+                break;
+            case 3:
+                INST_NAME("FSTP float[ED], ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                if (!ST_IS_F(0)) {
+                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x2);
+                    FCVTSD(v1, v1);
+                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                }
+                FSW(v1, ed, fixedaddress);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FLDENV Ed");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                MOV32w(x2, 0);
+                CALL(fpu_loadenv, -1, ed, x2);
+                break;
+            case 5:
+                INST_NAME("FLDCW Ew");
+                GETEW(x1, 0);
+                SH(x1, xEmu, offsetof(x64emu_t, cw)); // hopefully cw is not too far for an imm8
+                break;
+            case 6:
+                INST_NAME("FNSTENV Ed");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                MOV32w(x2, 0);
+                CALL(fpu_savenv, -1, ed, x2);
+                break;
+            case 7:
+                INST_NAME("FNSTCW Ew");
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, x1, &fixedaddress, rex, NULL, 0, 0);
+                ed = x1;
+                wb1 = 1;
+                LH(x1, xEmu, offsetof(x64emu_t, cw));
+                EWBACK;
+                break;
+            default:
+                DEFAULT;
+        }
     return addr;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_da.c b/src/dynarec/rv64/dynarec_rv64_da.c
index 7609d877..165ae3c8 100644
--- a/src/dynarec/rv64/dynarec_rv64_da.c
+++ b/src/dynarec/rv64/dynarec_rv64_da.c
@@ -43,196 +43,187 @@ uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(ed);
     MAYUSE(j64);
 
-    switch (nextop) {
-        case 0xC0:
-        case 0xC1:
-        case 0xC2:
-        case 0xC3:
-        case 0xC4:
-        case 0xC5:
-        case 0xC6:
-        case 0xC7:
-            INST_NAME("FCMOVB ST0, STx");
-            READFLAGS(X_CF);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            ANDI(x1, xFlags, 1 << F_CF);
-            CBZ_NEXT(x1);
-            if (ST_IS_F(0))
-                FMVS(v1, v2);
-            else
-                FMVD(v1, v2);
-            break;
-        case 0xC8:
-        case 0xC9:
-        case 0xCA:
-        case 0xCB:
-        case 0xCC:
-        case 0xCD:
-        case 0xCE:
-        case 0xCF:
-            INST_NAME("FCMOVE ST0, STx");
-            READFLAGS(X_ZF);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            ANDI(x1, xFlags, 1 << F_ZF);
-            CBZ_NEXT(x1);
-            if (ST_IS_F(0))
-                FMVS(v1, v2);
-            else
-                FMVD(v1, v2);
-            break;
-        case 0xD0:
-        case 0xD1:
-        case 0xD2:
-        case 0xD3:
-        case 0xD4:
-        case 0xD5:
-        case 0xD6:
-        case 0xD7:
-            INST_NAME("FCMOVBE ST0, STx");
-            READFLAGS(X_CF | X_ZF);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF));
-            CBZ_NEXT(x1);
-            if (ST_IS_F(0))
-                FMVS(v1, v2);
-            else
-                FMVD(v1, v2);
-            break;
-        case 0xD8:
-        case 0xD9:
-        case 0xDA:
-        case 0xDB:
-        case 0xDC:
-        case 0xDD:
-        case 0xDE:
-        case 0xDF:
-            INST_NAME("FCMOVU ST0, STx");
-            READFLAGS(X_PF);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            ANDI(x1, xFlags, (1 << F_PF));
-            CBZ_NEXT(x1);
-            if (ST_IS_F(0))
-                FMVS(v1, v2);
-            else
-                FMVD(v1, v2);
-            break;
-        case 0xE9:
-            INST_NAME("FUCOMPP ST0, ST1");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, 1, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xE4:
-        case 0xF0:
-        case 0xF1:
-        case 0xF4:
-        case 0xF5:
-        case 0xF6:
-        case 0xF7:
-        case 0xF8:
-        case 0xF9:
-        case 0xFD:
+    if (MODREG)
+        switch (nextop) {
+            case 0xC0:
+            case 0xC1:
+            case 0xC2:
+            case 0xC3:
+            case 0xC4:
+            case 0xC5:
+            case 0xC6:
+            case 0xC7:
+                INST_NAME("FCMOVB ST0, STx");
+                READFLAGS(X_CF);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                ANDI(x1, xFlags, 1 << F_CF);
+                CBZ_NEXT(x1);
+                if (ST_IS_F(0))
+                    FMVS(v1, v2);
+                else
+                    FMVD(v1, v2);
+                break;
+            case 0xC8:
+            case 0xC9:
+            case 0xCA:
+            case 0xCB:
+            case 0xCC:
+            case 0xCD:
+            case 0xCE:
+            case 0xCF:
+                INST_NAME("FCMOVE ST0, STx");
+                READFLAGS(X_ZF);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                ANDI(x1, xFlags, 1 << F_ZF);
+                CBZ_NEXT(x1);
+                if (ST_IS_F(0))
+                    FMVS(v1, v2);
+                else
+                    FMVD(v1, v2);
+                break;
+            case 0xD0:
+            case 0xD1:
+            case 0xD2:
+            case 0xD3:
+            case 0xD4:
+            case 0xD5:
+            case 0xD6:
+            case 0xD7:
+                INST_NAME("FCMOVBE ST0, STx");
+                READFLAGS(X_CF | X_ZF);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF));
+                CBZ_NEXT(x1);
+                if (ST_IS_F(0))
+                    FMVS(v1, v2);
+                else
+                    FMVD(v1, v2);
+                break;
+            case 0xD8:
+            case 0xD9:
+            case 0xDA:
+            case 0xDB:
+            case 0xDC:
+            case 0xDD:
+            case 0xDE:
+            case 0xDF:
+                INST_NAME("FCMOVU ST0, STx");
+                READFLAGS(X_PF);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                ANDI(x1, xFlags, (1 << F_PF));
+                CBZ_NEXT(x1);
+                if (ST_IS_F(0))
+                    FMVS(v1, v2);
+                else
+                    FMVD(v1, v2);
+                break;
+            case 0xE9:
+                INST_NAME("FUCOMPP ST0, ST1");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, 1, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+        default:
             DEFAULT;
             break;
-
-        default:
-            switch ((nextop >> 3) & 7) {
-                case 0:
-                    INST_NAME("FIADD ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v2, x1, RD_RNE); // i32 -> double
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-                    FADDD(v1, v1, v2);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 1:
-                    INST_NAME("FIMUL ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v2, x1, RD_RNE); // i32 -> double
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-                    FMULD(v1, v1, v2);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 2:
-                    INST_NAME("FICOM ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v2, x1, RD_RNE); // i32 -> double
-                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
-                    break;
-                case 3:
-                    INST_NAME("FICOMP ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v2, x1, RD_RNE); // i32 -> double
-                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FISUB ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v2, x1, RD_RNE); // i32 -> double
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-                    FSUBD(v1, v1, v2);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 5:
-                    INST_NAME("FISUBR ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v2, x1, RD_RNE); // i32 -> double
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-                    FSUBD(v1, v2, v1);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 6:
-                    INST_NAME("FIDIV ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v2, x1, RD_RNE); // i32 -> double
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-                    FDIVD(v1, v1, v2);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 7:
-                    INST_NAME("FIDIVR ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v2, x1, RD_RNE); // i32 -> double
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-                    FDIVD(v1, v2, v1);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-            }
     }
+    else
+        switch ((nextop >> 3) & 7) {
+            case 0:
+                INST_NAME("FIADD ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FADDD(v1, v1, v2);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 1:
+                INST_NAME("FIMUL ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FMULD(v1, v1, v2);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 2:
+                INST_NAME("FICOM ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                break;
+            case 3:
+                INST_NAME("FICOMP ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FISUB ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FSUBD(v1, v1, v2);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 5:
+                INST_NAME("FISUBR ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FSUBD(v1, v2, v1);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 6:
+                INST_NAME("FIDIV ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FDIVD(v1, v1, v2);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 7:
+                INST_NAME("FIDIVR ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v2, x1, RD_RNE); // i32 -> double
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FDIVD(v1, v2, v1);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+        }
     return addr;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c
index 37aafc65..45fcd1e0 100644
--- a/src/dynarec/rv64/dynarec_rv64_db.c
+++ b/src/dynarec/rv64/dynarec_rv64_db.c
@@ -44,254 +44,249 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(v1);
     MAYUSE(j64);
 
-    switch (nextop) {
-        case 0xC0:
-        case 0xC1:
-        case 0xC2:
-        case 0xC3:
-        case 0xC4:
-        case 0xC5:
-        case 0xC6:
-        case 0xC7:
-            INST_NAME("FCMOVNB ST0, STx");
-            READFLAGS(X_CF);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            ANDI(x1, xFlags, 1 << F_CF);
-            CBNZ_NEXT(x1);
-            if (ST_IS_F(0)) {
-                FMVS(v1, v2);
-            } else {
-                FMVD(v1, v2); // F_CF==0
-            }
-            break;
-        case 0xC8:
-        case 0xC9:
-        case 0xCA:
-        case 0xCB:
-        case 0xCC:
-        case 0xCD:
-        case 0xCE:
-        case 0xCF:
-            INST_NAME("FCMOVNE ST0, STx");
-            READFLAGS(X_ZF);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            ANDI(x1, xFlags, 1 << F_ZF);
-            CBNZ_NEXT(x1);
-            if (ST_IS_F(0)) {
-                FMVS(v1, v2);
-            } else {
-                FMVD(v1, v2); // F_ZF==0
-            }
-            break;
-        case 0xD0:
-        case 0xD1:
-        case 0xD2:
-        case 0xD3:
-        case 0xD4:
-        case 0xD5:
-        case 0xD6:
-        case 0xD7:
-            INST_NAME("FCMOVNBE ST0, STx");
-            READFLAGS(X_CF | X_ZF);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF));
-            CBNZ_NEXT(x1);
-            if (ST_IS_F(0)) {
-                FMVS(v1, v2);
-            } else {
-                FMVD(v1, v2); // F_CF==0 & F_ZF==0
-            }
-            break;
-        case 0xD8:
-        case 0xD9:
-        case 0xDA:
-        case 0xDB:
-        case 0xDC:
-        case 0xDD:
-        case 0xDE:
-        case 0xDF:
-            INST_NAME("FCMOVNU ST0, STx");
-            READFLAGS(X_PF);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            ANDI(x1, xFlags, 1 << F_PF);
-            CBNZ_NEXT(x1);
-            if (ST_IS_F(0)) {
-                FMVS(v1, v2);
-            } else {
-                FMVD(v1, v2); // F_PF==0
-            }
-            break;
-        case 0xE1:
-            INST_NAME("FDISI8087_NOP"); // so.. NOP?
-            break;
-        case 0xE2:
-            INST_NAME("FNCLEX");
-            LH(x2, xEmu, offsetof(x64emu_t, sw));
-            ANDI(x2, x2, ~(0xff));  // IE .. PE, SF, ES
-            MOV32w(x1, ~(1 << 15)); // B
-            AND(x2, x2, x1);
-            SH(x2, xEmu, offsetof(x64emu_t, sw));
-            break;
-        case 0xE3:
-            INST_NAME("FNINIT");
-            MESSAGE(LOG_DUMP, "Need Optimization\n");
-            x87_purgecache(dyn, ninst, 0, x1, x2, x3);
-            CALL(reset_fpu, -1, 0, 0);
-            break;
-        case 0xE8:
-        case 0xE9:
-        case 0xEA:
-        case 0xEB:
-        case 0xEC:
-        case 0xED:
-        case 0xEE:
-        case 0xEF:
-            INST_NAME("FUCOMI ST0, STx");
-            SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMIS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMID(v1, v2, x1, x2, x3, x4, x5);
-            }
+    if (MODREG)
+        switch (nextop) {
+            case 0xC0:
+            case 0xC1:
+            case 0xC2:
+            case 0xC3:
+            case 0xC4:
+            case 0xC5:
+            case 0xC6:
+            case 0xC7:
+                INST_NAME("FCMOVNB ST0, STx");
+                READFLAGS(X_CF);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                ANDI(x1, xFlags, 1 << F_CF);
+                CBNZ_NEXT(x1);
+                if (ST_IS_F(0)) {
+                    FMVS(v1, v2);
+                } else {
+                    FMVD(v1, v2); // F_CF==0
+                }
+                break;
+            case 0xC8:
+            case 0xC9:
+            case 0xCA:
+            case 0xCB:
+            case 0xCC:
+            case 0xCD:
+            case 0xCE:
+            case 0xCF:
+                INST_NAME("FCMOVNE ST0, STx");
+                READFLAGS(X_ZF);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                ANDI(x1, xFlags, 1 << F_ZF);
+                CBNZ_NEXT(x1);
+                if (ST_IS_F(0)) {
+                    FMVS(v1, v2);
+                } else {
+                    FMVD(v1, v2); // F_ZF==0
+                }
+                break;
+            case 0xD0:
+            case 0xD1:
+            case 0xD2:
+            case 0xD3:
+            case 0xD4:
+            case 0xD5:
+            case 0xD6:
+            case 0xD7:
+                INST_NAME("FCMOVNBE ST0, STx");
+                READFLAGS(X_CF | X_ZF);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                ANDI(x1, xFlags, (1 << F_CF) | (1 << F_ZF));
+                CBNZ_NEXT(x1);
+                if (ST_IS_F(0)) {
+                    FMVS(v1, v2);
+                } else {
+                    FMVD(v1, v2); // F_CF==0 & F_ZF==0
+                }
+                break;
+            case 0xD8:
+            case 0xD9:
+            case 0xDA:
+            case 0xDB:
+            case 0xDC:
+            case 0xDD:
+            case 0xDE:
+            case 0xDF:
+                INST_NAME("FCMOVNU ST0, STx");
+                READFLAGS(X_PF);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                ANDI(x1, xFlags, 1 << F_PF);
+                CBNZ_NEXT(x1);
+                if (ST_IS_F(0)) {
+                    FMVS(v1, v2);
+                } else {
+                    FMVD(v1, v2); // F_PF==0
+                }
+                break;
+            case 0xE1:
+                INST_NAME("FDISI8087_NOP"); // so.. NOP?
+                break;
+            case 0xE2:
+                INST_NAME("FNCLEX");
+                LH(x2, xEmu, offsetof(x64emu_t, sw));
+                ANDI(x2, x2, ~(0xff));  // IE .. PE, SF, ES
+                MOV32w(x1, ~(1 << 15)); // B
+                AND(x2, x2, x1);
+                SH(x2, xEmu, offsetof(x64emu_t, sw));
+                break;
+            case 0xE3:
+                INST_NAME("FNINIT");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                x87_purgecache(dyn, ninst, 0, x1, x2, x3);
+                CALL(reset_fpu, -1, 0, 0);
+                break;
+            case 0xE8:
+            case 0xE9:
+            case 0xEA:
+            case 0xEB:
+            case 0xEC:
+            case 0xED:
+            case 0xEE:
+            case 0xEF:
+                INST_NAME("FUCOMI ST0, STx");
+                SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMIS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMID(v1, v2, x1, x2, x3, x4, x5);
+                }
 
-            break;
-        case 0xF0:
-        case 0xF1:
-        case 0xF2:
-        case 0xF3:
-        case 0xF4:
-        case 0xF5:
-        case 0xF6:
-        case 0xF7:
-            INST_NAME("FCOMI ST0, STx");
-            SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMIS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMID(v1, v2, x1, x2, x3, x4, x5);
-            }
-            break;
-
-        case 0xE0:
-        case 0xE4:
-        case 0xE5:
-        case 0xE6:
-        case 0xE7:
-            DEFAULT;
-            break;
-
-        default:
-            switch ((nextop >> 3) & 7) {
-                case 0:
-                    INST_NAME("FILD ST0, Ed");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(x1, ed, fixedaddress);
-                    FCVTDW(v1, x1, RD_RNE); // i32 -> double
-                    break;
-                case 1:
-                    INST_NAME("FISTTP Ed, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
-                    if (!BOX64ENV(dynarec_fastround)) {
-                        FSFLAGSI(0); // reset all bits
-                    }
-                    FCVTWD(x4, v1, RD_RTZ);
-                    if (!BOX64ENV(dynarec_fastround)) {
-                        FRFLAGS(x5); // get back FPSR to check the IOC bit
-                        ANDI(x5, x5, 1 << FR_NV);
-                        BEQZ_MARK(x5);
-                        MOV32w(x4, 0x80000000);
-                        MARK;
-                    }
-                    SW(x4, wback, fixedaddress);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 2:
-                    INST_NAME("FIST Ed, ST0");
-                    DEFAULT;
-                    break;
-                case 3:
-                    INST_NAME("FISTP Ed, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    u8 = x87_setround(dyn, ninst, x1, x2);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
-                    v2 = fpu_get_scratch(dyn);
-                    if (!BOX64ENV(dynarec_fastround)) {
-                        FSFLAGSI(0); // reset all bits
-                    }
-                    FCVTWD(x4, v1, RD_DYN);
-                    x87_restoreround(dyn, ninst, u8);
-                    if (!BOX64ENV(dynarec_fastround)) {
-                        FRFLAGS(x5); // get back FPSR to check the IOC bit
-                        ANDI(x5, x5, 1 << FR_NV);
-                        BEQ_MARK2(x5, xZR);
-                        MOV32w(x4, 0x80000000);
-                    }
-                    MARK2;
-                    SW(x4, wback, fixedaddress);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 5:
-                    INST_NAME("FLD tbyte");
+                break;
+            case 0xF0:
+            case 0xF1:
+            case 0xF2:
+            case 0xF3:
+            case 0xF4:
+            case 0xF5:
+            case 0xF6:
+            case 0xF7:
+                INST_NAME("FCOMI ST0, STx");
+                SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMIS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMID(v1, v2, x1, x2, x3, x4, x5);
+                }
+                break;
+            default:
+                DEFAULT;
+                break;
+        }
+    else
+        switch ((nextop >> 3) & 7) {
+            case 0:
+                INST_NAME("FILD ST0, Ed");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LW(x1, ed, fixedaddress);
+                FCVTDW(v1, x1, RD_RNE); // i32 -> double
+                break;
+            case 1:
+                INST_NAME("FISTTP Ed, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FSFLAGSI(0); // reset all bits
+                }
+                FCVTWD(x4, v1, RD_RTZ);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FRFLAGS(x5); // get back FPSR to check the IOC bit
+                    ANDI(x5, x5, 1 << FR_NV);
+                    BEQZ_MARK(x5);
+                    MOV32w(x4, 0x80000000);
+                    MARK;
+                }
+                SW(x4, wback, fixedaddress);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 2:
+                INST_NAME("FIST Ed, ST0");
+                DEFAULT;
+                break;
+            case 3:
+                INST_NAME("FISTP Ed, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                u8 = x87_setround(dyn, ninst, x1, x2);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                v2 = fpu_get_scratch(dyn);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FSFLAGSI(0); // reset all bits
+                }
+                FCVTWD(x4, v1, RD_DYN);
+                x87_restoreround(dyn, ninst, u8);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FRFLAGS(x5); // get back FPSR to check the IOC bit
+                    ANDI(x5, x5, 1 << FR_NV);
+                    BEQ_MARK2(x5, xZR);
+                    MOV32w(x4, 0x80000000);
+                }
+                MARK2;
+                SW(x4, wback, fixedaddress);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 5:
+                INST_NAME("FLD tbyte");
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0);
+                if ((PK(0) == 0xDB && ((PK(1) >> 3) & 7) == 7) || (!rex.is32bits && PK(0) >= 0x40 && PK(0) <= 0x4f && PK(1) == 0xDB && ((PK(2) >> 3) & 7) == 7)) {
+                    // the FLD is immediatly followed by an FSTP
+                    LD(x5, ed, fixedaddress + 0);
+                    LH(x6, ed, fixedaddress + 8);
+                    // no persistant scratch register, so unrool both instruction here...
+                    MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n");
+                    nextop = F8; // 0xDB or rex
+                    if (!rex.is32bits && nextop >= 0x40 && nextop <= 0x4f) {
+                        rex.rex = nextop;
+                        nextop = F8; // 0xDB
+                    } else
+                        rex.rex = 0;
+                    nextop = F8; // modrm
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0);
-                    if ((PK(0) == 0xDB && ((PK(1) >> 3) & 7) == 7) || (!rex.is32bits && PK(0) >= 0x40 && PK(0) <= 0x4f && PK(1) == 0xDB && ((PK(2) >> 3) & 7) == 7)) {
-                        // the FLD is immediatly followed by an FSTP
-                        LD(x5, ed, fixedaddress + 0);
-                        LH(x6, ed, fixedaddress + 8);
-                        // no persistant scratch register, so unrool both instruction here...
-                        MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n");
-                        nextop = F8; // 0xDB or rex
-                        if (!rex.is32bits && nextop >= 0x40 && nextop <= 0x4f) {
-                            rex.rex = nextop;
-                            nextop = F8; // 0xDB
-                        } else
-                            rex.rex = 0;
-                        nextop = F8; // modrm
-                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0);
-                        SD(x5, ed, fixedaddress + 0);
-                        SH(x6, ed, fixedaddress + 8);
-                    } else {
-                        if (BOX64ENV(x87_no80bits)) {
-                            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
-                            FLD(v1, ed, fixedaddress);
-                        } else {
-                            ADDI(x1, ed, fixedaddress);
-                            X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x3);
-                            // sync top
-                            s0 = x87_stackcount(dyn, ninst, x3);
-                            CALL(native_fld, -1, x1, 0);
-                            // go back with the top & stack counter
-                            x87_unstackcount(dyn, ninst, x3, s0);
-                        }
-                    }
-                    break;
-                case 7:
-                    INST_NAME("FSTP tbyte");
+                    SD(x5, ed, fixedaddress + 0);
+                    SH(x6, ed, fixedaddress + 8);
+                } else {
                     if (BOX64ENV(x87_no80bits)) {
-                        v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                        FSD(v1, wback, fixedaddress);
+                        X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                        FLD(v1, ed, fixedaddress);
                     } else {
-                        x87_forget(dyn, ninst, x1, x3, 0);
-                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                        ADDI(x1, ed, fixedaddress);
+                        X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x3);
+                        // sync top
                         s0 = x87_stackcount(dyn, ninst, x3);
-                        CALL(native_fstp, -1, ed, 0);
+                        CALL(native_fld, -1, x1, 0);
+                        // go back with the top & stack counter
                         x87_unstackcount(dyn, ninst, x3, s0);
                     }
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                default:
-                    DEFAULT;
-            }
-    }
+                }
+                break;
+            case 7:
+                INST_NAME("FSTP tbyte");
+                if (BOX64ENV(x87_no80bits)) {
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                    FSD(v1, wback, fixedaddress);
+                } else {
+                    x87_forget(dyn, ninst, x1, x3, 0);
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                    s0 = x87_stackcount(dyn, ninst, x3);
+                    CALL(native_fstp, -1, ed, 0);
+                    x87_unstackcount(dyn, ninst, x3, s0);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            default:
+                DEFAULT;
+        }
     return addr;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_dc.c b/src/dynarec/rv64/dynarec_rv64_dc.c
index 309abde8..3346e6de 100644
--- a/src/dynarec/rv64/dynarec_rv64_dc.c
+++ b/src/dynarec/rv64/dynarec_rv64_dc.c
@@ -39,180 +39,184 @@ uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(v2);
     MAYUSE(v1);
 
-    switch (nextop) {
-        case 0xC0 ... 0xC7:
-            INST_NAME("FADD STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FADDS(v1, v1, v2);
-            } else {
-                FADDD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xC8 ... 0xCF:
-            INST_NAME("FMUL STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FMULS(v1, v1, v2);
-            } else {
-                FMULD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xD0 ... 0xD7:
-            INST_NAME("FCOM ST0, STx"); // yep
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            break;
-        case 0xD8 ... 0xDF:
-            INST_NAME("FCOMP ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xE0 ... 0xE7:
-            INST_NAME("FSUBR STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FSUBS(v1, v2, v1);
-            } else {
-                FSUBD(v1, v2, v1);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xE8 ... 0xEF:
-            INST_NAME("FSUB STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FSUBS(v1, v1, v2);
-            } else {
-                FSUBD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xF0 ... 0xF7:
-            INST_NAME("FDIVR STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FDIVS(v1, v2, v1);
-            } else {
-                FDIVD(v1, v2, v1);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        case 0xF8 ... 0xFF:
-            INST_NAME("FDIV STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FDIVS(v1, v1, v2);
-            } else {
-                FDIVD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            break;
-        default:
-            switch ((nextop >> 3) & 7) {
-                case 0:
-                    INST_NAME("FADD ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v2, wback, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+    if (MODREG)
+        switch (nextop) {
+            case 0xC0 ... 0xC7:
+                INST_NAME("FADD STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FADDS(v1, v1, v2);
+                } else {
                     FADDD(v1, v1, v2);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 1:
-                    INST_NAME("FMUL ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v2, wback, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xC8 ... 0xCF:
+                INST_NAME("FMUL STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FMULS(v1, v1, v2);
+                } else {
                     FMULD(v1, v1, v2);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 2:
-                    INST_NAME("FCOM ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v2, wback, fixedaddress);
-                    FCOMD(v1, v2, x1, x6, x3, x4, x5);
-                    break;
-                case 3:
-                    INST_NAME("FCOMP ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v2, wback, fixedaddress);
-                    FCOMD(v1, v2, x1, x6, x3, x4, x5);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FSUB ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v2, wback, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-                    FSUBD(v1, v1, v2);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 5:
-                    INST_NAME("FSUBR ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v2, wback, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xD0 ... 0xD7:
+                INST_NAME("FCOM ST0, STx"); // yep
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                break;
+            case 0xD8 ... 0xDF:
+                INST_NAME("FCOMP ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xE0 ... 0xE7:
+                INST_NAME("FSUBR STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FSUBS(v1, v2, v1);
+                } else {
                     FSUBD(v1, v2, v1);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 6:
-                    INST_NAME("FDIV ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v2, wback, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-                    FDIVD(v1, v1, v2);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 7:
-                    INST_NAME("FDIVR ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v2, wback, fixedaddress);
-                    if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xE8 ... 0xEF:
+                INST_NAME("FSUB STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FSUBS(v1, v1, v2);
+                } else {
+                    FSUBD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xF0 ... 0xF7:
+                INST_NAME("FDIVR STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FDIVS(v1, v2, v1);
+                } else {
                     FDIVD(v1, v2, v1);
-                    if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-                    break;
-            }
-    }
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 0xF8 ... 0xFF:
+                INST_NAME("FDIV STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FDIVS(v1, v1, v2);
+                } else {
+                    FDIVD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            default:
+                DEFAULT;
+                break;
+        }
+    else
+        switch ((nextop >> 3) & 7) {
+            case 0:
+                INST_NAME("FADD ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v2, wback, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FADDD(v1, v1, v2);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 1:
+                INST_NAME("FMUL ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v2, wback, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FMULD(v1, v1, v2);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 2:
+                INST_NAME("FCOM ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v2, wback, fixedaddress);
+                FCOMD(v1, v2, x1, x6, x3, x4, x5);
+                break;
+            case 3:
+                INST_NAME("FCOMP ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v2, wback, fixedaddress);
+                FCOMD(v1, v2, x1, x6, x3, x4, x5);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FSUB ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v2, wback, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FSUBD(v1, v1, v2);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 5:
+                INST_NAME("FSUBR ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v2, wback, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FSUBD(v1, v2, v1);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 6:
+                INST_NAME("FDIV ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v2, wback, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FDIVD(v1, v1, v2);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+            case 7:
+                INST_NAME("FDIVR ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v2, wback, fixedaddress);
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                FDIVD(v1, v2, v1);
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                break;
+        }
     return addr;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_dd.c b/src/dynarec/rv64/dynarec_rv64_dd.c
index 06e2ea98..4a1e527d 100644
--- a/src/dynarec/rv64/dynarec_rv64_dd.c
+++ b/src/dynarec/rv64/dynarec_rv64_dd.c
@@ -42,188 +42,131 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(v1);
     MAYUSE(j64);
 
-    switch (nextop) {
-        case 0xC0:
-        case 0xC1:
-        case 0xC2:
-        case 0xC3:
-        case 0xC4:
-        case 0xC5:
-        case 0xC6:
-        case 0xC7:
-            INST_NAME("FFREE STx");
-            MESSAGE(LOG_DUMP, "Need Optimization\n");
-            x87_purgecache(dyn, ninst, 0, x1, x2, x3);
-            MOV32w(x1, nextop & 7);
-            CALL(fpu_do_free, -1, x1, 0);
-            break;
-        case 0xD0:
-        case 0xD1:
-        case 0xD2:
-        case 0xD3:
-        case 0xD4:
-        case 0xD5:
-        case 0xD6:
-        case 0xD7:
-            INST_NAME("FST ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FMVS(v2, v1);
-            } else {
-                FMVD(v2, v1);
-            }
-            break;
-        case 0xD8:
-            INST_NAME("FSTP ST0, ST0");
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xD9:
-        case 0xDA:
-        case 0xDB:
-        case 0xDC:
-        case 0xDD:
-        case 0xDE:
-        case 0xDF:
-            INST_NAME("FSTP ST0, STx");
-            // copy the cache value for st0 to stx
-            x87_get_st_empty(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7));
-            x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-            x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xE0:
-        case 0xE1:
-        case 0xE2:
-        case 0xE3:
-        case 0xE4:
-        case 0xE5:
-        case 0xE6:
-        case 0xE7:
-            INST_NAME("FUCOM ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            break;
-        case 0xE8:
-        case 0xE9:
-        case 0xEA:
-        case 0xEB:
-        case 0xEC:
-        case 0xED:
-        case 0xEE:
-        case 0xEF:
-            INST_NAME("FUCOMP ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xC8:
-        case 0xC9:
-        case 0xCA:
-        case 0xCB:
-        case 0xCC:
-        case 0xCD:
-        case 0xCE:
-        case 0xCF:
-        case 0xF0:
-        case 0xF1:
-        case 0xF2:
-        case 0xF3:
-        case 0xF4:
-        case 0xF5:
-        case 0xF6:
-        case 0xF7:
-        case 0xF8:
-        case 0xF9:
-        case 0xFA:
-        case 0xFB:
-        case 0xFC:
-        case 0xFD:
-        case 0xFE:
-        case 0xFF:
-            DEFAULT;
-            break;
-
-        default:
-            switch ((nextop >> 3) & 7) {
-                case 0:
-                    INST_NAME("FLD double");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FLD(v1, wback, fixedaddress);
-                    break;
-                case 1:
-                    INST_NAME("FISTTP i64, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
-                    if (ST_IS_I64(0)) {
-                        FSD(v1, wback, fixedaddress);
-                    } else {
-                        if (!BOX64ENV(dynarec_fastround)) {
-                            FSFLAGSI(0); // reset all bits
-                        }
-                        FCVTLD(x4, v1, RD_RTZ);
-                        if (!BOX64ENV(dynarec_fastround)) {
-                            FRFLAGS(x5); // get back FPSR to check the IOC bit
-                            ANDI(x5, x5, 1 << FR_NV);
-                            BEQZ_MARK(x5);
-                            MOV64x(x4, 0x8000000000000000);
-                            MARK;
-                        }
-                        SD(x4, wback, fixedaddress);
-                    }
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 2:
-                    INST_NAME("FST double");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+    if (MODREG)
+        switch (nextop) {
+            case 0xC0 ... 0xC7:
+                INST_NAME("FFREE STx");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                x87_purgecache(dyn, ninst, 0, x1, x2, x3);
+                MOV32w(x1, nextop & 7);
+                CALL(fpu_do_free, -1, x1, 0);
+                break;
+            case 0xD0 ... 0xD7:
+                INST_NAME("FST ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FMVS(v2, v1);
+                } else {
+                    FMVD(v2, v1);
+                }
+                break;
+            case 0xD8:
+                INST_NAME("FSTP ST0, ST0");
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xD9 ... 0xDF:
+                INST_NAME("FSTP ST0, STx");
+                // copy the cache value for st0 to stx
+                x87_get_st_empty(dyn, ninst, x1, x2, nextop & 7, X87_ST(nextop & 7));
+                x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                x87_swapreg(dyn, ninst, x1, x2, 0, nextop & 7);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xE0 ... 0xE7:
+                INST_NAME("FUCOM ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                break;
+            case 0xE8 ... 0xEF:
+                INST_NAME("FUCOMP ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            default:
+                DEFAULT;
+                break;
+        }
+    else
+        switch ((nextop >> 3) & 7) {
+            case 0:
+                INST_NAME("FLD double");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLD(v1, wback, fixedaddress);
+                break;
+            case 1:
+                INST_NAME("FISTTP i64, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
+                if (ST_IS_I64(0)) {
                     FSD(v1, wback, fixedaddress);
-                    break;
-                case 3:
-                    INST_NAME("FSTP double");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    FSD(v1, wback, fixedaddress);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 6:
-                    INST_NAME("FSAVE m108byte");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0);
-                    CALL(native_fsave, -1, ed, 0);
-                    break;
-                case 7:
-                    INST_NAME("FNSTSW m2byte");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0);
-                    LWU(x2, xEmu, offsetof(x64emu_t, top));
-                    LHU(x3, xEmu, offsetof(x64emu_t, sw));
-                    if (dyn->e.x87stack) {
-                        // update top
-                        ADDI(x2, x2, -dyn->e.x87stack);
-                        ANDI(x2, x2, 7);
+                } else {
+                    if (!BOX64ENV(dynarec_fastround)) {
+                        FSFLAGSI(0); // reset all bits
+                    }
+                    FCVTLD(x4, v1, RD_RTZ);
+                    if (!BOX64ENV(dynarec_fastround)) {
+                        FRFLAGS(x5); // get back FPSR to check the IOC bit
+                        ANDI(x5, x5, 1 << FR_NV);
+                        BEQZ_MARK(x5);
+                        MOV64x(x4, 0x8000000000000000);
+                        MARK;
                     }
-                    MOV32w(x5, ~0x3800);
-                    AND(x3, x3, x5);          // mask out TOP
-                    SLLI(x2, x2, 11);         // shift TOP to bit 11
-                    OR(x3, x3, x2);           // inject TOP
-                    SH(x3, ed, fixedaddress); // store whole sw flags
-                    break;
-                default:
-                    DEFAULT;
-            }
-    }
+                    SD(x4, wback, fixedaddress);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 2:
+                INST_NAME("FST double");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FSD(v1, wback, fixedaddress);
+                break;
+            case 3:
+                INST_NAME("FSTP double");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FSD(v1, wback, fixedaddress);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 6:
+                INST_NAME("FSAVE m108byte");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0);
+                CALL(native_fsave, -1, ed, 0);
+                break;
+            case 7:
+                INST_NAME("FNSTSW m2byte");
+                fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0);
+                LWU(x2, xEmu, offsetof(x64emu_t, top));
+                LHU(x3, xEmu, offsetof(x64emu_t, sw));
+                if (dyn->e.x87stack) {
+                    // update top
+                    ADDI(x2, x2, -dyn->e.x87stack);
+                    ANDI(x2, x2, 7);
+                }
+                MOV32w(x5, ~0x3800);
+                AND(x3, x3, x5);          // mask out TOP
+                SLLI(x2, x2, 11);         // shift TOP to bit 11
+                OR(x3, x3, x2);           // inject TOP
+                SH(x3, ed, fixedaddress); // store whole sw flags
+                break;
+            default:
+                DEFAULT;
+        }
     return addr;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_de.c b/src/dynarec/rv64/dynarec_rv64_de.c
index 91a7b0c5..d0952a29 100644
--- a/src/dynarec/rv64/dynarec_rv64_de.c
+++ b/src/dynarec/rv64/dynarec_rv64_de.c
@@ -38,173 +38,119 @@ uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(v2);
     MAYUSE(v1);
 
-    switch (nextop) {
-        case 0xC0:
-        case 0xC1:
-        case 0xC2:
-        case 0xC3:
-        case 0xC4:
-        case 0xC5:
-        case 0xC6:
-        case 0xC7:
-            INST_NAME("FADDP STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FADDS(v1, v1, v2);
-            } else {
-                FADDD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xC8:
-        case 0xC9:
-        case 0xCA:
-        case 0xCB:
-        case 0xCC:
-        case 0xCD:
-        case 0xCE:
-        case 0xCF:
-            INST_NAME("FMULP STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FMULS(v1, v1, v2);
-            } else {
-                FMULD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xD0:
-        case 0xD1:
-        case 0xD2:
-        case 0xD3:
-        case 0xD4:
-        case 0xD5:
-        case 0xD6:
-        case 0xD7:
-            INST_NAME("FCOMP ST0, STx"); // yep
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xD9:
-            INST_NAME("FCOMPP ST0, STx");
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (ST_IS_F(0)) {
-                FCOMS(v1, v2, x1, x2, x3, x4, x5);
-            } else {
-                FCOMD(v1, v2, x1, x2, x3, x4, x5);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xE0:
-        case 0xE1:
-        case 0xE2:
-        case 0xE3:
-        case 0xE4:
-        case 0xE5:
-        case 0xE6:
-        case 0xE7:
-            INST_NAME("FSUBRP STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FSUBS(v1, v2, v1);
-            } else {
-                FSUBD(v1, v2, v1);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xE8:
-        case 0xE9:
-        case 0xEA:
-        case 0xEB:
-        case 0xEC:
-        case 0xED:
-        case 0xEE:
-        case 0xEF:
-            INST_NAME("FSUBP STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FSUBS(v1, v1, v2);
-            } else {
-                FSUBD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xF0:
-        case 0xF1:
-        case 0xF2:
-        case 0xF3:
-        case 0xF4:
-        case 0xF5:
-        case 0xF6:
-        case 0xF7:
-            INST_NAME("FDIVRP STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FDIVS(v1, v2, v1);
-            } else {
-                FDIVD(v1, v2, v1);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xF8:
-        case 0xF9:
-        case 0xFA:
-        case 0xFB:
-        case 0xFC:
-        case 0xFD:
-        case 0xFE:
-        case 0xFF:
-            INST_NAME("FDIVP STx, ST0");
-            v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
-            if (ST_IS_F(0)) {
-                FDIVS(v1, v1, v2);
-            } else {
-                FDIVD(v1, v1, v2);
-            }
-            if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xD8:
-        case 0xDA:
-        case 0xDB:
-        case 0xDC:
-        case 0xDD:
-        case 0xDE:
-        case 0xDF:
-            DEFAULT;
-            break;
-        default:
-            switch ((nextop >> 3) & 7) {
-                default:
-                    DEFAULT;
-            }
-    }
+    if (MODREG)
+        switch (nextop) {
+            case 0xC0 ... 0xC7:
+                INST_NAME("FADDP STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FADDS(v1, v1, v2);
+                } else {
+                    FADDD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xC8 ... 0xCF:
+                INST_NAME("FMULP STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FMULS(v1, v1, v2);
+                } else {
+                    FMULD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xD0 ... 0xD7:
+                INST_NAME("FCOMP ST0, STx"); // yep
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xD9:
+                INST_NAME("FCOMPP ST0, STx");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (ST_IS_F(0)) {
+                    FCOMS(v1, v2, x1, x2, x3, x4, x5);
+                } else {
+                    FCOMD(v1, v2, x1, x2, x3, x4, x5);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xE0 ... 0xE7:
+                INST_NAME("FSUBRP STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FSUBS(v1, v2, v1);
+                } else {
+                    FSUBD(v1, v2, v1);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xE8 ... 0xEF:
+                INST_NAME("FSUBP STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FSUBS(v1, v1, v2);
+                } else {
+                    FSUBD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xF0 ... 0xF7:
+                INST_NAME("FDIVRP STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FDIVS(v1, v2, v1);
+                } else {
+                    FDIVD(v1, v2, v1);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 0xF8 ... 0xFF:
+                INST_NAME("FDIVP STx, ST0");
+                v2 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v1 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                if (!BOX64ENV(dynarec_fastround)) u8 = x87_setround(dyn, ninst, x1, x5);
+                if (ST_IS_F(0)) {
+                    FDIVS(v1, v1, v2);
+                } else {
+                    FDIVD(v1, v1, v2);
+                }
+                if (!BOX64ENV(dynarec_fastround)) x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            default:
+                DEFAULT;
+                break;
+        }
+    else
+        switch ((nextop >> 3) & 7) {
+            default:
+                DEFAULT;
+        }
     return addr;
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c
index cbb75923..7d689d9b 100644
--- a/src/dynarec/rv64/dynarec_rv64_df.c
+++ b/src/dynarec/rv64/dynarec_rv64_df.c
@@ -40,275 +40,273 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(v1);
     MAYUSE(j64);
 
-    switch (nextop) {
-        case 0xC0 ... 0xC7:
-            INST_NAME("FFREEP STx");
-            // not handling Tag...
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
+    if (MODREG)
+        switch (nextop) {
+            case 0xC0 ... 0xC7:
+                INST_NAME("FFREEP STx");
+                // not handling Tag...
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
 
-        case 0xE0:
-            INST_NAME("FNSTSW AX");
-            LWU(x2, xEmu, offsetof(x64emu_t, top));
-            if (dyn->e.x87stack) {
-                ADDI(x2, x2, -dyn->e.x87stack);
-                ANDI(x2, x2, 0x7);
-            }
-            LHU(x1, xEmu, offsetof(x64emu_t, sw));
-            MOV32w(x3, 0b1100011111111111); // mask
-            AND(x1, x1, x3);
-            SLLI(x2, x2, 11);
-            OR(x1, x1, x2); // inject top
-            SH(x1, xEmu, offsetof(x64emu_t, sw));
-            SRLI(xRAX, xRAX, 16);
-            SLLI(xRAX, xRAX, 16);
-            OR(xRAX, xRAX, x1);
-            break;
-        case 0xE8 ... 0xF7:
-            if (nextop < 0xF0) {
-                INST_NAME("FUCOMIP ST0, STx");
-            } else {
-                INST_NAME("FCOMIP ST0, STx");
-            }
-            SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
-            SET_DFNONE();
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
-            v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
-            CLEAR_FLAGS();
-            IFX (X_ZF | X_PF | X_CF) {
-                if (ST_IS_F(0)) {
-                    FEQS(x5, v1, v1);
-                    FEQS(x4, v2, v2);
-                    AND(x5, x5, x4);
-                    BEQZ(x5, 24); // undefined/NaN
-                    FEQS(x5, v1, v2);
-                    BNEZ(x5, 24);           // equal
-                    FLTS(x3, v1, v2);       // x3 = (v1<v2)?1:0
-                    OR(xFlags, xFlags, x3); // CF is the least significant bit
-                    J(16);                  // end
-                    // NaN
-                    ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF));
-                    J(8); // end
-                    // equal
-                    ORI(xFlags, xFlags, 1 << F_ZF);
-                    // end
+            case 0xE0:
+                INST_NAME("FNSTSW AX");
+                LWU(x2, xEmu, offsetof(x64emu_t, top));
+                if (dyn->e.x87stack) {
+                    ADDI(x2, x2, -dyn->e.x87stack);
+                    ANDI(x2, x2, 0x7);
+                }
+                LHU(x1, xEmu, offsetof(x64emu_t, sw));
+                MOV32w(x3, 0b1100011111111111); // mask
+                AND(x1, x1, x3);
+                SLLI(x2, x2, 11);
+                OR(x1, x1, x2); // inject top
+                SH(x1, xEmu, offsetof(x64emu_t, sw));
+                SRLI(xRAX, xRAX, 16);
+                SLLI(xRAX, xRAX, 16);
+                OR(xRAX, xRAX, x1);
+                break;
+            case 0xE8 ... 0xF7:
+                if (nextop < 0xF0) {
+                    INST_NAME("FUCOMIP ST0, STx");
                 } else {
-                    FEQD(x5, v1, v1);
-                    FEQD(x4, v2, v2);
-                    AND(x5, x5, x4);
-                    BEQZ(x5, 24); // undefined/NaN
-                    FEQD(x5, v1, v2);
-                    BNEZ(x5, 24);           // equal
-                    FLTD(x3, v1, v2);       // x3 = (v1<v2)?1:0
-                    OR(xFlags, xFlags, x3); // CF is the least significant bit
-                    J(16);                  // end
-                    // NaN
-                    ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF));
-                    J(8); // end
-                    // equal
-                    ORI(xFlags, xFlags, 1 << F_ZF);
-                    // end
+                    INST_NAME("FCOMIP ST0, STx");
                 }
-            }
-            X87_POP_OR_FAIL(dyn, ninst, x3);
-            break;
-        case 0xC8 ... 0xDF:
-        case 0xE1 ... 0xE7:
-        case 0xF8 ... 0xFF:
-            DEFAULT;
-            break;
-
-        default:
-            switch ((nextop >> 3) & 7) {
-                case 0:
-                    INST_NAME("FILD ST0, Ew");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
-                    LH(x1, wback, fixedaddress);
+                SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+                SET_DFNONE();
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_COMBINE(0, nextop & 7));
+                v2 = x87_get_st(dyn, ninst, x1, x2, nextop & 7, X87_COMBINE(0, nextop & 7));
+                CLEAR_FLAGS();
+                IFX (X_ZF | X_PF | X_CF) {
                     if (ST_IS_F(0)) {
-                        FCVTSL(v1, x1, RD_RNE);
+                        FEQS(x5, v1, v1);
+                        FEQS(x4, v2, v2);
+                        AND(x5, x5, x4);
+                        BEQZ(x5, 24); // undefined/NaN
+                        FEQS(x5, v1, v2);
+                        BNEZ(x5, 24);           // equal
+                        FLTS(x3, v1, v2);       // x3 = (v1<v2)?1:0
+                        OR(xFlags, xFlags, x3); // CF is the least significant bit
+                        J(16);                  // end
+                        // NaN
+                        ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF));
+                        J(8); // end
+                        // equal
+                        ORI(xFlags, xFlags, 1 << F_ZF);
+                        // end
                     } else {
-                        FCVTDL(v1, x1, RD_RNE);
-                    }
-                    break;
-                case 1:
-                    INST_NAME("FISTTP Ew, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
-                    if (!BOX64ENV(dynarec_fastround)) {
-                        FSFLAGSI(0); // reset all bits
+                        FEQD(x5, v1, v1);
+                        FEQD(x4, v2, v2);
+                        AND(x5, x5, x4);
+                        BEQZ(x5, 24); // undefined/NaN
+                        FEQD(x5, v1, v2);
+                        BNEZ(x5, 24);           // equal
+                        FLTD(x3, v1, v2);       // x3 = (v1<v2)?1:0
+                        OR(xFlags, xFlags, x3); // CF is the least significant bit
+                        J(16);                  // end
+                        // NaN
+                        ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF));
+                        J(8); // end
+                        // equal
+                        ORI(xFlags, xFlags, 1 << F_ZF);
+                        // end
                     }
-                    if (ST_IS_F(0)) {
-                        FCVTWS(x4, v1, RD_RTZ);
-                    } else {
-                        FCVTWD(x4, v1, RD_RTZ);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            default:
+                DEFAULT;
+                break;
+        }
+    else
+        switch ((nextop >> 3) & 7) {
+            case 0:
+                INST_NAME("FILD ST0, Ew");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
+                LH(x1, wback, fixedaddress);
+                if (ST_IS_F(0)) {
+                    FCVTSL(v1, x1, RD_RNE);
+                } else {
+                    FCVTDL(v1, x1, RD_RNE);
+                }
+                break;
+            case 1:
+                INST_NAME("FISTTP Ew, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, x4, &fixedaddress, rex, NULL, 1, 0);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FSFLAGSI(0); // reset all bits
+                }
+                if (ST_IS_F(0)) {
+                    FCVTWS(x4, v1, RD_RTZ);
+                } else {
+                    FCVTWD(x4, v1, RD_RTZ);
+                }
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FRFLAGS(x5); // get back FPSR to check the IOC bit
+                    ANDI(x5, x5, 1 << FR_NV);
+                    BNEZ_MARK(x5);
+                    SLLIW(x5, x4, 16);
+                    SRAIW(x5, x5, 16);
+                    BEQ_MARK2(x5, x4);
+                    MARK;
+                    MOV32w(x4, 0x8000);
+                }
+                MARK2;
+                SH(x4, wback, fixedaddress);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 2:
+                INST_NAME("FIST Ew, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
+                u8 = x87_setround(dyn, ninst, x1, x2);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FSFLAGSI(0); // reset all bits
+                }
+                if (ST_IS_F(0)) {
+                    FCVTWS(x4, v1, RD_DYN);
+                } else {
+                    FCVTWD(x4, v1, RD_DYN);
+                }
+                x87_restoreround(dyn, ninst, u8);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FRFLAGS(x5); // get back FPSR to check the IOC bit
+                    ANDI(x5, x5, 1 << FR_NV);
+                    BNEZ_MARK(x5);
+                    SLLIW(x5, x4, 16);
+                    SRAIW(x5, x5, 16);
+                    BEQ_MARK2(x5, x4);
+                    MARK;
+                    MOV32w(x4, 0x8000);
+                }
+                MARK2;
+                SH(x4, wback, fixedaddress);
+                break;
+            case 3:
+                INST_NAME("FISTP Ew, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
+                u8 = x87_setround(dyn, ninst, x1, x2);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FSFLAGSI(0); // reset all bits
+                }
+                if (ST_IS_F(0)) {
+                    FCVTWS(x4, v1, RD_DYN);
+                } else {
+                    FCVTWD(x4, v1, RD_DYN);
+                }
+                x87_restoreround(dyn, ninst, u8);
+                if (!BOX64ENV(dynarec_fastround)) {
+                    FRFLAGS(x5); // get back FPSR to check the IOC bit
+                    ANDI(x5, x5, 1 << FR_NV);
+                    BNEZ_MARK(x5);
+                    SLLIW(x5, x4, 16);
+                    SRAIW(x5, x5, 16);
+                    BEQ_MARK2(x5, x4);
+                    MARK;
+                    MOV32w(x4, 0x8000);
+                }
+                MARK2;
+                SH(x4, wback, fixedaddress);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FBLD ST0, tbytes");
+                X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                s0 = x87_stackcount(dyn, ninst, x3);
+                CALL(fpu_fbld, -1, ed, 0);
+                x87_unstackcount(dyn, ninst, x3, s0);
+                break;
+            case 5:
+                INST_NAME("FILD ST0, i64");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_I64);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+
+                if (ST_IS_I64(0)) {
+                    FLD(v1, wback, fixedaddress);
+                } else {
+                    LD(x1, wback, fixedaddress);
+                    if (rex.is32bits) {
+                        // need to also feed the STll stuff...
+                        ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll));
+                        LWU(x5, xEmu, offsetof(x64emu_t, top));
+                        int a = 0 - dyn->e.x87stack;
+                        if (a) {
+                            ADDIW(x5, x5, a);
+                            ANDI(x5, x5, 0x7);
+                        }
+                        SLLI(x5, x5, 4); // fpu_ll is 2 i64
+                        ADD(x5, x5, x4);
+                        SD(x1, x5, 8); // ll
                     }
-                    if (!BOX64ENV(dynarec_fastround)) {
-                        FRFLAGS(x5); // get back FPSR to check the IOC bit
-                        ANDI(x5, x5, 1 << FR_NV);
-                        BNEZ_MARK(x5);
-                        SLLIW(x5, x4, 16);
-                        SRAIW(x5, x5, 16);
-                        BEQ_MARK2(x5, x4);
-                        MARK;
-                        MOV32w(x4, 0x8000);
+                    FCVTDL(v1, x1, RD_DYN);
+                    if (rex.is32bits) {
+                        FSD(v1, x5, 0); // ref
                     }
-                    MARK2;
-                    SH(x4, wback, fixedaddress);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 2:
-                    INST_NAME("FIST Ew, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
+                }
+                break;
+            case 6:
+                INST_NAME("FBSTP tbytes, ST0");
+                x87_forget(dyn, ninst, x1, x2, 0);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                s0 = x87_stackcount(dyn, ninst, x3);
+                CALL(fpu_fbst, -1, ed, 0);
+                x87_unstackcount(dyn, ninst, x3, s0);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 7:
+                INST_NAME("FISTP i64, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64);
+                if (!ST_IS_I64(0)) {
                     u8 = x87_setround(dyn, ninst, x1, x2);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
-                    if (!BOX64ENV(dynarec_fastround)) {
-                        FSFLAGSI(0); // reset all bits
-                    }
-                    if (ST_IS_F(0)) {
-                        FCVTWS(x4, v1, RD_DYN);
-                    } else {
-                        FCVTWD(x4, v1, RD_DYN);
-                    }
-                    x87_restoreround(dyn, ninst, u8);
-                    if (!BOX64ENV(dynarec_fastround)) {
-                        FRFLAGS(x5); // get back FPSR to check the IOC bit
-                        ANDI(x5, x5, 1 << FR_NV);
-                        BNEZ_MARK(x5);
-                        SLLIW(x5, x4, 16);
-                        SRAIW(x5, x5, 16);
-                        BEQ_MARK2(x5, x4);
+                }
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+
+                if (ST_IS_I64(0)) {
+                    FSD(v1, wback, fixedaddress);
+                } else {
+                    if (rex.is32bits) {
+                        // need to check STll first...
+                        ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll));
+                        LWU(x5, xEmu, offsetof(x64emu_t, top));
+                        int a = 0 - dyn->e.x87stack;
+                        if (a) {
+                            ADDIW(x5, x5, a);
+                            ANDI(x5, x5, 0x7);
+                        }
+                        SLLI(x5, x5, 4); // fpu_ll is 2 i64
+                        ADD(x5, x5, x4);
+                        FMVXD(x3, v1);
+                        LD(x6, x5, 0); // ref
+                        BNE_MARK(x6, x3);
+                        LD(x6, x5, 8); // ll
+                        SD(x6, wback, fixedaddress);
+                        B_MARK3_nocond;
                         MARK;
-                        MOV32w(x4, 0x8000);
                     }
-                    MARK2;
-                    SH(x4, wback, fixedaddress);
-                    break;
-                case 3:
-                    INST_NAME("FISTP Ew, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_F);
-                    u8 = x87_setround(dyn, ninst, x1, x2);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+
                     if (!BOX64ENV(dynarec_fastround)) {
                         FSFLAGSI(0); // reset all bits
                     }
-                    if (ST_IS_F(0)) {
-                        FCVTWS(x4, v1, RD_DYN);
-                    } else {
-                        FCVTWD(x4, v1, RD_DYN);
-                    }
-                    x87_restoreround(dyn, ninst, u8);
+                    FCVTLD(x4, v1, RD_DYN);
                     if (!BOX64ENV(dynarec_fastround)) {
                         FRFLAGS(x5); // get back FPSR to check the IOC bit
                         ANDI(x5, x5, 1 << FR_NV);
-                        BNEZ_MARK(x5);
-                        SLLIW(x5, x4, 16);
-                        SRAIW(x5, x5, 16);
-                        BEQ_MARK2(x5, x4);
-                        MARK;
-                        MOV32w(x4, 0x8000);
+                        BEQ_MARK2(x5, xZR);
+                        MOV64x(x4, 0x8000000000000000LL);
                     }
                     MARK2;
-                    SH(x4, wback, fixedaddress);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FBLD ST0, tbytes");
-                    X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                    s0 = x87_stackcount(dyn, ninst, x3);
-                    CALL(fpu_fbld, -1, ed, 0);
-                    x87_unstackcount(dyn, ninst, x3, s0);
-                    break;
-                case 5:
-                    INST_NAME("FILD ST0, i64");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_I64);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
-
-                    if (ST_IS_I64(0)) {
-                        FLD(v1, wback, fixedaddress);
-                    } else {
-                        LD(x1, wback, fixedaddress);
-                        if (rex.is32bits) {
-                            // need to also feed the STll stuff...
-                            ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll));
-                            LWU(x5, xEmu, offsetof(x64emu_t, top));
-                            int a = 0 - dyn->e.x87stack;
-                            if (a) {
-                                ADDIW(x5, x5, a);
-                                ANDI(x5, x5, 0x7);
-                            }
-                            SLLI(x5, x5, 4); // fpu_ll is 2 i64
-                            ADD(x5, x5, x4);
-                            SD(x1, x5, 8); // ll
-                        }
-                        FCVTDL(v1, x1, RD_DYN);
-                        if (rex.is32bits) {
-                            FSD(v1, x5, 0); // ref
-                        }
-                    }
-                    break;
-                case 6:
-                    INST_NAME("FBSTP tbytes, ST0");
-                    x87_forget(dyn, ninst, x1, x2, 0);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                    s0 = x87_stackcount(dyn, ninst, x3);
-                    CALL(fpu_fbst, -1, ed, 0);
-                    x87_unstackcount(dyn, ninst, x3, s0);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 7:
-                    INST_NAME("FISTP i64, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64);
-                    if (!ST_IS_I64(0)) {
-                        u8 = x87_setround(dyn, ninst, x1, x2);
-                    }
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0);
-
-                    if (ST_IS_I64(0)) {
-                        FSD(v1, wback, fixedaddress);
-                    } else {
-                        if (rex.is32bits) {
-                            // need to check STll first...
-                            ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll));
-                            LWU(x5, xEmu, offsetof(x64emu_t, top));
-                            int a = 0 - dyn->e.x87stack;
-                            if (a) {
-                                ADDIW(x5, x5, a);
-                                ANDI(x5, x5, 0x7);
-                            }
-                            SLLI(x5, x5, 4); // fpu_ll is 2 i64
-                            ADD(x5, x5, x4);
-                            FMVXD(x3, v1);
-                            LD(x6, x5, 0); // ref
-                            BNE_MARK(x6, x3);
-                            LD(x6, x5, 8); // ll
-                            SD(x6, wback, fixedaddress);
-                            B_MARK3_nocond;
-                            MARK;
-                        }
-
-                        if (!BOX64ENV(dynarec_fastround)) {
-                            FSFLAGSI(0); // reset all bits
-                        }
-                        FCVTLD(x4, v1, RD_DYN);
-                        if (!BOX64ENV(dynarec_fastround)) {
-                            FRFLAGS(x5); // get back FPSR to check the IOC bit
-                            ANDI(x5, x5, 1 << FR_NV);
-                            BEQ_MARK2(x5, xZR);
-                            MOV64x(x4, 0x8000000000000000LL);
-                        }
-                        MARK2;
-                        SD(x4, wback, fixedaddress);
-                        MARK3;
-                        x87_restoreround(dyn, ninst, u8);
-                    }
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                default:
-                    DEFAULT;
-                    break;
-            }
-    }
+                    SD(x4, wback, fixedaddress);
+                    MARK3;
+                    x87_restoreround(dyn, ninst, u8);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            default:
+                DEFAULT;
+                break;
+        }
     return addr;
 }