about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-02-04 10:39:25 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-02-04 10:39:25 +0100
commit165961f27e264164dd62eea0164d16d9d436a8a5 (patch)
tree7b66a706c0ceeaf14e7c6044138f4df202ef597c /src
parent81386b7a5600e864427df86ec59a762da23efd12 (diff)
downloadbox64-165961f27e264164dd62eea0164d16d9d436a8a5.tar.gz
box64-165961f27e264164dd62eea0164d16d9d436a8a5.zip
[ARM64_DYNAREC] Small D8..DF opcodes refactor
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d8.c228
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d9.c158
-rw-r--r--src/dynarec/arm64/dynarec_arm64_da.c186
-rw-r--r--src/dynarec/arm64/dynarec_arm64_db.c375
-rw-r--r--src/dynarec/arm64/dynarec_arm64_dc.c143
-rw-r--r--src/dynarec/arm64/dynarec_arm64_dd.c198
-rw-r--r--src/dynarec/arm64/dynarec_arm64_de.c198
-rw-r--r--src/dynarec/arm64/dynarec_arm64_df.c481
8 files changed, 938 insertions, 1029 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_d8.c b/src/dynarec/arm64/dynarec_arm64_d8.c
index 0fd7c7c4..113519a9 100644
--- a/src/dynarec/arm64/dynarec_arm64_d8.c
+++ b/src/dynarec/arm64/dynarec_arm64_d8.c
@@ -38,6 +38,7 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     MAYUSE(v2);
     MAYUSE(v1);
 
+    if(MODREG)
     switch(nextop) {
         case 0xC0:
         case 0xC1:
@@ -178,119 +179,120 @@ uintptr_t dynarec64_D8(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 FDIVD(v1, v2, v1);
             }
             break;
-
         default:
-            switch((nextop>>3)&7) {
-                case 0:
-                    INST_NAME("FADD ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(s0, ed, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        FADDS(v1, v1, s0);
-                    } else {
-                        FCVT_D_S(s0, s0);
-                        FADDD(v1, v1, s0);
-                    }
-                    break;
-                case 1:
-                    INST_NAME("FMUL ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(s0, ed, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        FMULS(v1, v1, s0);
-                    } else {
-                        FCVT_D_S(s0, s0);
-                        FMULD(v1, v1, s0);
-                    }
-                    break;
-                case 2:
-                    INST_NAME("FCOM ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(s0, ed, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        FCMPS(v1, s0);
-                    } else {
-                        FCVT_D_S(s0, s0);
-                        FCMPD(v1, s0);
-                    }
-                    FCOM(x1, x2, x3);
-                    break;
-                case 3:
-                    INST_NAME("FCOMP ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(s0, ed, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        FCMPS(v1, s0);
-                    } else {
-                        FCVT_D_S(s0, s0);
-                        FCMPD(v1, s0);
-                    }
-                    FCOM(x1, x2, x3);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FSUB ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(s0, ed, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        FSUBS(v1, v1, s0);
-                    } else {
-                        FCVT_D_S(s0, s0);
-                        FSUBD(v1, v1, s0);
-                    }
-                    break;
-                case 5:
-                    INST_NAME("FSUBR ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(s0, ed, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        FSUBS(v1, s0, v1);
-                    } else {
-                        FCVT_D_S(s0, s0);
-                        FSUBD(v1, s0, v1);
-                    }
-                    break;
-                case 6:
-                    INST_NAME("FDIV ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(s0, ed, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        FDIVS(v1, v1, s0);
-                    } else {
-                        FCVT_D_S(s0, s0);
-                        FDIVD(v1, v1, s0);
-                    }
-                    break;
-                case 7:
-                    INST_NAME("FDIVR ST0, float[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
-                    s0 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(s0, ed, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        FDIVS(v1, s0, v1);
-                    } else {
-                        FCVT_D_S(s0, s0);
-                        FDIVD(v1, s0, v1);
-                    }
-                    break;
-                default:
-                    DEFAULT;
-            }
-    }
+            DEFAULT;
+            break;
+    } else
+        switch((nextop>>3)&7) {
+            case 0:
+                INST_NAME("FADD ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(s0, ed, fixedaddress);
+                if(ST_IS_F(0)) {
+                    FADDS(v1, v1, s0);
+                } else {
+                    FCVT_D_S(s0, s0);
+                    FADDD(v1, v1, s0);
+                }
+                break;
+            case 1:
+                INST_NAME("FMUL ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(s0, ed, fixedaddress);
+                if(ST_IS_F(0)) {
+                    FMULS(v1, v1, s0);
+                } else {
+                    FCVT_D_S(s0, s0);
+                    FMULD(v1, v1, s0);
+                }
+                break;
+            case 2:
+                INST_NAME("FCOM ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(s0, ed, fixedaddress);
+                if(ST_IS_F(0)) {
+                    FCMPS(v1, s0);
+                } else {
+                    FCVT_D_S(s0, s0);
+                    FCMPD(v1, s0);
+                }
+                FCOM(x1, x2, x3);
+                break;
+            case 3:
+                INST_NAME("FCOMP ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(s0, ed, fixedaddress);
+                if(ST_IS_F(0)) {
+                    FCMPS(v1, s0);
+                } else {
+                    FCVT_D_S(s0, s0);
+                    FCMPD(v1, s0);
+                }
+                FCOM(x1, x2, x3);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FSUB ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(s0, ed, fixedaddress);
+                if(ST_IS_F(0)) {
+                    FSUBS(v1, v1, s0);
+                } else {
+                    FCVT_D_S(s0, s0);
+                    FSUBD(v1, v1, s0);
+                }
+                break;
+            case 5:
+                INST_NAME("FSUBR ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(s0, ed, fixedaddress);
+                if(ST_IS_F(0)) {
+                    FSUBS(v1, s0, v1);
+                } else {
+                    FCVT_D_S(s0, s0);
+                    FSUBD(v1, s0, v1);
+                }
+                break;
+            case 6:
+                INST_NAME("FDIV ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(s0, ed, fixedaddress);
+                if(ST_IS_F(0)) {
+                    FDIVS(v1, v1, s0);
+                } else {
+                    FCVT_D_S(s0, s0);
+                    FDIVD(v1, v1, s0);
+                }
+                break;
+            case 7:
+                INST_NAME("FDIVR ST0, float[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
+                s0 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(s0, ed, fixedaddress);
+                if(ST_IS_F(0)) {
+                    FDIVS(v1, s0, v1);
+                } else {
+                    FCVT_D_S(s0, s0);
+                    FDIVD(v1, s0, v1);
+                }
+                break;
+            default:
+                DEFAULT;
+        }
     return addr;
 }
diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c
index 05b89518..c97b2410 100644
--- a/src/dynarec/arm64/dynarec_arm64_d9.c
+++ b/src/dynarec/arm64/dynarec_arm64_d9.c
@@ -41,6 +41,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     MAYUSE(v2);
     MAYUSE(v1);
 
+    if(MODREG)
     switch(nextop) {
         case 0xC0:
         case 0xC1:
@@ -462,92 +463,79 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             CALL(native_fcos, -1);
             x87_unstackcount(dyn, ninst, x1, i1);
             break;
-
-
-        case 0xD1:
-        case 0xD4:
-        case 0xD5:
-        case 0xD6:
-        case 0xD7:
-        case 0xE2:
-        case 0xE3:
-        case 0xE6:
-        case 0xE7:
-        case 0xEF:
+        default:
             DEFAULT;
             break;
-
-        default:
-            switch((nextop>>3)&7) {
-                case 0:
-                    INST_NAME("FLD ST0, float[ED]");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box64_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v1, ed, fixedaddress);
-                    if(!ST_IS_F(0)) {
-                        FCVT_D_S(v1, v1);
-                    }
-                    break;
-                case 2:
-                    INST_NAME("FST float[ED], ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
-                    if(ST_IS_F(0))
-                        s0 = v1;
-                    else {
-                        s0 = fpu_get_scratch(dyn);
-                        FCVT_S_D(s0, v1);
-                    }
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VST32(s0, ed, fixedaddress);
-                    break;
-                case 3:
-                    INST_NAME("FSTP float[ED], ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    if(!ST_IS_F(0)) {
-                        FCVT_S_D(v1, v1);
-                    }
-                    VST32(v1, ed, fixedaddress);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FLDENV Ed");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    if(ed!=x1) {
-                        MOVx_REG(x1, ed);
-                    }
-                    MOV32w(x2, 0);
-                    CALL(fpu_loadenv, -1);
-                    break;
-                case 5:
-                    INST_NAME("FLDCW Ew");
-                    GETEW(x1, 0);
-                    STRH_U12(x1, xEmu, offsetof(x64emu_t, cw));    // hopefully cw is not too far for an imm8
-                    break;
-                case 6:
-                    INST_NAME("FNSTENV Ed");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    if(ed!=x1) {
-                        MOVx_REG(x1, ed);
-                    }
-                    MOV32w(x2, 0);
-                    CALL(fpu_savenv, -1);
-                    break;
-                case 7:
-                    INST_NAME("FNSTCW Ew");
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    ed = x1;
-                    wb1 = 1;
-                    LDRH_U12(x1, xEmu, offsetof(x64emu_t, cw));
-                    EWBACK;
-                    break;
-                default:
-                    DEFAULT;
-            }
-    }
+    } else
+        switch((nextop>>3)&7) {
+            case 0:
+                INST_NAME("FLD ST0, float[ED]");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, box64_dynarec_x87double?NEON_CACHE_ST_D:NEON_CACHE_ST_F);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v1, ed, fixedaddress);
+                if(!ST_IS_F(0)) {
+                    FCVT_D_S(v1, v1);
+                }
+                break;
+            case 2:
+                INST_NAME("FST float[ED], ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
+                if(ST_IS_F(0))
+                    s0 = v1;
+                else {
+                    s0 = fpu_get_scratch(dyn);
+                    FCVT_S_D(s0, v1);
+                }
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VST32(s0, ed, fixedaddress);
+                break;
+            case 3:
+                INST_NAME("FSTP float[ED], ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                if(!ST_IS_F(0)) {
+                    FCVT_S_D(v1, v1);
+                }
+                VST32(v1, ed, fixedaddress);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FLDENV Ed");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                if(ed!=x1) {
+                    MOVx_REG(x1, ed);
+                }
+                MOV32w(x2, 0);
+                CALL(fpu_loadenv, -1);
+                break;
+            case 5:
+                INST_NAME("FLDCW Ew");
+                GETEW(x1, 0);
+                STRH_U12(x1, xEmu, offsetof(x64emu_t, cw));    // hopefully cw is not too far for an imm8
+                break;
+            case 6:
+                INST_NAME("FNSTENV Ed");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                if(ed!=x1) {
+                    MOVx_REG(x1, ed);
+                }
+                MOV32w(x2, 0);
+                CALL(fpu_savenv, -1);
+                break;
+            case 7:
+                INST_NAME("FNSTCW Ew");
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                ed = x1;
+                wb1 = 1;
+                LDRH_U12(x1, xEmu, offsetof(x64emu_t, cw));
+                EWBACK;
+                break;
+            default:
+                DEFAULT;
+        }
     return addr;
 }
diff --git a/src/dynarec/arm64/dynarec_arm64_da.c b/src/dynarec/arm64/dynarec_arm64_da.c
index e1735b87..b278ef02 100644
--- a/src/dynarec/arm64/dynarec_arm64_da.c
+++ b/src/dynarec/arm64/dynarec_arm64_da.c
@@ -42,6 +42,7 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     MAYUSE(ed);
     MAYUSE(j64);
 
+    if(MODREG)
     switch(nextop) {
         case 0xC0:
         case 0xC1:
@@ -134,106 +135,95 @@ uintptr_t dynarec64_DA(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
 
-        case 0xE4:
-        case 0xF0:
-        case 0xF1:
-        case 0xF4:
-        case 0xF5:
-        case 0xF6:
-        case 0xF7:
-        case 0xF8:
-        case 0xF9:
-        case 0xFD:
+        default:
             DEFAULT;
             break;
-
-        default:
-            switch((nextop>>3)&7) {
-                case 0:
-                    INST_NAME("FIADD ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v2, ed, fixedaddress);
-                    SXTL_32(v2, v2);    // i32 -> i64
-                    SCVTFDD(v2, v2);    // i64 -> double
-                    FADDD(v1, v1, v2);
-                    break;
-                case 1:
-                    INST_NAME("FIMUL ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v2, ed, fixedaddress);
-                    SXTL_32(v2, v2);    // i32 -> i64
-                    SCVTFDD(v2, v2);    // i64 -> double
-                    FMULD(v1, v1, v2);
-                    break;
-                case 2:
-                    INST_NAME("FICOM ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v2, ed, fixedaddress);
-                    SXTL_32(v2, v2);    // i32 -> i64
-                    SCVTFDD(v2, v2);    // i64 -> double
-                    FCMPD(v1, v2);
-                    FCOM(x1, x2, x3);
-                    break;
-                case 3:
-                    INST_NAME("FICOMP ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v2, ed, fixedaddress);
-                    SXTL_32(v2, v2);    // i32 -> i64
-                    SCVTFDD(v2, v2);    // i64 -> double
-                    FCMPD(v1, v2);
-                    FCOM(x1, x2, x3);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FISUB ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v2, ed, fixedaddress);
-                    SXTL_32(v2, v2);    // i32 -> i64
-                    SCVTFDD(v2, v2);    // i64 -> double
-                    FSUBD(v1, v1, v2);
-                    break;
-                case 5:
-                    INST_NAME("FISUBR ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v2, ed, fixedaddress);
-                    SXTL_32(v2, v2);    // i32 -> i64
-                    SCVTFDD(v2, v2);    // i64 -> double
-                    FSUBD(v1, v2, v1);
-                    break;
-                case 6:
-                    INST_NAME("FIDIV ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v2, ed, fixedaddress);
-                    SXTL_32(v2, v2);    // i32 -> i64
-                    SCVTFDD(v2, v2);    // i64 -> double
-                    FDIVD(v1, v1, v2);
-                    break;
-                case 7:
-                    INST_NAME("FIDIVR ST0, Ed");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v2, ed, fixedaddress);
-                    SXTL_32(v2, v2);    // i32 -> i64
-                    SCVTFDD(v2, v2);    // i64 -> double
-                    FDIVD(v1, v2, v1);
-                    break;
-            }
-    }
+    } else
+        switch((nextop>>3)&7) {
+            case 0:
+                INST_NAME("FIADD ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v2, ed, fixedaddress);
+                SXTL_32(v2, v2);    // i32 -> i64
+                SCVTFDD(v2, v2);    // i64 -> double
+                FADDD(v1, v1, v2);
+                break;
+            case 1:
+                INST_NAME("FIMUL ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v2, ed, fixedaddress);
+                SXTL_32(v2, v2);    // i32 -> i64
+                SCVTFDD(v2, v2);    // i64 -> double
+                FMULD(v1, v1, v2);
+                break;
+            case 2:
+                INST_NAME("FICOM ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v2, ed, fixedaddress);
+                SXTL_32(v2, v2);    // i32 -> i64
+                SCVTFDD(v2, v2);    // i64 -> double
+                FCMPD(v1, v2);
+                FCOM(x1, x2, x3);
+                break;
+            case 3:
+                INST_NAME("FICOMP ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v2, ed, fixedaddress);
+                SXTL_32(v2, v2);    // i32 -> i64
+                SCVTFDD(v2, v2);    // i64 -> double
+                FCMPD(v1, v2);
+                FCOM(x1, x2, x3);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FISUB ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v2, ed, fixedaddress);
+                SXTL_32(v2, v2);    // i32 -> i64
+                SCVTFDD(v2, v2);    // i64 -> double
+                FSUBD(v1, v1, v2);
+                break;
+            case 5:
+                INST_NAME("FISUBR ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v2, ed, fixedaddress);
+                SXTL_32(v2, v2);    // i32 -> i64
+                SCVTFDD(v2, v2);    // i64 -> double
+                FSUBD(v1, v2, v1);
+                break;
+            case 6:
+                INST_NAME("FIDIV ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v2, ed, fixedaddress);
+                SXTL_32(v2, v2);    // i32 -> i64
+                SCVTFDD(v2, v2);    // i64 -> double
+                FDIVD(v1, v1, v2);
+                break;
+            case 7:
+                INST_NAME("FIDIVR ST0, Ed");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v2, ed, fixedaddress);
+                SXTL_32(v2, v2);    // i32 -> i64
+                SCVTFDD(v2, v2);    // i64 -> double
+                FDIVD(v1, v2, v1);
+                break;
+        }
     return addr;
 }
 
diff --git a/src/dynarec/arm64/dynarec_arm64_db.c b/src/dynarec/arm64/dynarec_arm64_db.c
index 02eb0659..d9aa8e31 100644
--- a/src/dynarec/arm64/dynarec_arm64_db.c
+++ b/src/dynarec/arm64/dynarec_arm64_db.c
@@ -42,6 +42,7 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     MAYUSE(v1);
     MAYUSE(j64);
 
+    if(MODREG)
     switch(nextop) {
         case 0xC0:
         case 0xC1:
@@ -176,204 +177,198 @@ uintptr_t dynarec64_DB(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             FCOMI(x1, x2);
             break;
 
-        case 0xE0:
-        case 0xE4:
-        case 0xE5:
-        case 0xE6:
-        case 0xE7:
+        default:
             DEFAULT;
             break;
-
-        default:
-            switch((nextop>>3)&7) {
-                case 0:
-                    INST_NAME("FILD ST0, Ed");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    VLD32(v1, ed, fixedaddress);
-                    SXTL_32(v1, v1);    // i32 -> i64
-                    SCVTFDD(v1, v1);    // i64 -> double
-                    break;
-                case 1:
-                    INST_NAME("FISTTP Ed, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    s0 = fpu_get_scratch(dyn);
-                    if(arm64_frintts) {
-                        FRINT32ZD(s0, v1);
-                        FCVTZSwD(x5, s0);
-                        STW(x5, wback, fixedaddress);
-                    } else {
-                        MRS_fpsr(x5);
-                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                        MSR_fpsr(x5);
-                        FRINTZD(s0, v1);
-                        VFCVTZSd(s0, s0);
-                        SQXTN_S_D(s0, s0);
-                        VST32(s0, wback, fixedaddress);
-                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                        TBZ_MARK3(x5, FPSR_IOC);
-                        MOV32w(x5, 0x80000000);
-                        STW(x5, wback, fixedaddress);
-                        MARK3;
-                    }
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 2:
-                    INST_NAME("FIST Ed, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    s0 = fpu_get_scratch(dyn);
-                    if(arm64_frintts) {
-                        FRINT32XD(s0, v1);
-                        FCVTZSwD(x5, s0);
-                        STW(x5, wback, fixedaddress);
-                    } else {
-                        MRS_fpsr(x5);
-                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                        MSR_fpsr(x5);
-                        FRINTXD(s0, v1);
-                        VFCVTZSd(s0, s0);
-                        SQXTN_S_D(s0, s0);
-                        VST32(s0, wback, fixedaddress);
-                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                        TBZ_MARK3(x5, FPSR_IOC);
-                        MOV32w(x5, 0x80000000);
-                        STW(x5, wback, fixedaddress);
-                        MARK3;
-                    }
-                    x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 3:
-                    INST_NAME("FISTP Ed, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
-                    s0 = fpu_get_scratch(dyn);
-                    if(arm64_frintts) {
-                        FRINT32XD(s0, v1);
-                        FCVTZSwD(x5, s0);
-                        STW(x5, wback, fixedaddress);
-                    } else {
-                        MRS_fpsr(x5);
-                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                        MSR_fpsr(x5);
-                        FRINTXD(s0, v1);
-                        VFCVTZSd(s0, s0);
-                        SQXTN_S_D(s0, s0);
-                        VST32(s0, wback, fixedaddress);
-                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                        TBZ_MARK3(x5, FPSR_IOC);
-                        MOV32w(x5, 0x80000000);
-                        STW(x5, wback, fixedaddress);
-                        MARK3;
-                    }
-                    x87_restoreround(dyn, ninst, u8);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 5:
-                    INST_NAME("FLD tbyte");
+    } else
+        switch((nextop>>3)&7) {
+            case 0:
+                INST_NAME("FILD ST0, Ed");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                VLD32(v1, ed, fixedaddress);
+                SXTL_32(v1, v1);    // i32 -> i64
+                SCVTFDD(v1, v1);    // i64 -> double
+                break;
+            case 1:
+                INST_NAME("FISTTP Ed, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                s0 = fpu_get_scratch(dyn);
+                if(arm64_frintts) {
+                    FRINT32ZD(s0, v1);
+                    FCVTZSwD(x5, s0);
+                    STW(x5, wback, fixedaddress);
+                } else {
+                    MRS_fpsr(x5);
+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                    MSR_fpsr(x5);
+                    FRINTZD(s0, v1);
+                    VFCVTZSd(s0, s0);
+                    SQXTN_S_D(s0, s0);
+                    VST32(s0, wback, fixedaddress);
+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                    TBZ_MARK3(x5, FPSR_IOC);
+                    MOV32w(x5, 0x80000000);
+                    STW(x5, wback, fixedaddress);
+                    MARK3;
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 2:
+                INST_NAME("FIST Ed, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                s0 = fpu_get_scratch(dyn);
+                if(arm64_frintts) {
+                    FRINT32XD(s0, v1);
+                    FCVTZSwD(x5, s0);
+                    STW(x5, wback, fixedaddress);
+                } else {
+                    MRS_fpsr(x5);
+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                    MSR_fpsr(x5);
+                    FRINTXD(s0, v1);
+                    VFCVTZSd(s0, s0);
+                    SQXTN_S_D(s0, s0);
+                    VST32(s0, wback, fixedaddress);
+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                    TBZ_MARK3(x5, FPSR_IOC);
+                    MOV32w(x5, 0x80000000);
+                    STW(x5, wback, fixedaddress);
+                    MARK3;
+                }
+                x87_restoreround(dyn, ninst, u8);
+                break;
+            case 3:
+                INST_NAME("FISTP Ed, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                u8 = x87_setround(dyn, ninst, x1, x2, x4); // x1 have the modified RPSCR reg
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<2, 3, rex, NULL, 0, 0);
+                s0 = fpu_get_scratch(dyn);
+                if(arm64_frintts) {
+                    FRINT32XD(s0, v1);
+                    FCVTZSwD(x5, s0);
+                    STW(x5, wback, fixedaddress);
+                } else {
+                    MRS_fpsr(x5);
+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                    MSR_fpsr(x5);
+                    FRINTXD(s0, v1);
+                    VFCVTZSd(s0, s0);
+                    SQXTN_S_D(s0, s0);
+                    VST32(s0, wback, fixedaddress);
+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                    TBZ_MARK3(x5, FPSR_IOC);
+                    MOV32w(x5, 0x80000000);
+                    STW(x5, wback, fixedaddress);
+                    MARK3;
+                }
+                x87_restoreround(dyn, ninst, u8);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 5:
+                INST_NAME("FLD tbyte");
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                if((PK(0)==0xDB && ((PK(1)>>3)&7)==7) || (PK(0)>=0x40 && PK(0)<=0x4f && PK(1)==0xDB && ((PK(2)>>3)&7)==7)) {
+                    // the FLD is immediatly followed by an FSTP
+                    LDRx_U12(x5, ed, 0);
+                    LDRH_U12(x6, ed, 8);
+                    // no persistant scratch register, so unrool both instruction here...
+                    MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n");
+                    nextop = F8;    // 0xDB or rex
+                    if(nextop>=0x40 && nextop<=0x4f) {
+                        rex.rex = nextop;
+                        nextop = F8;    //0xDB
+                    } else
+                        rex.rex = 0;
+                    nextop = F8;    //modrm
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    if((PK(0)==0xDB && ((PK(1)>>3)&7)==7) || (PK(0)>=0x40 && PK(0)<=0x4f && PK(1)==0xDB && ((PK(2)>>3)&7)==7)) {
-                        // the FLD is immediatly followed by an FSTP
-                        LDRx_U12(x5, ed, 0);
-                        LDRH_U12(x6, ed, 8);
-                        // no persistant scratch register, so unrool both instruction here...
-                        MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n");
-                        nextop = F8;    // 0xDB or rex
-                        if(nextop>=0x40 && nextop<=0x4f) {
-                            rex.rex = nextop;
-                            nextop = F8;    //0xDB
-                        } else
-                            rex.rex = 0;
-                        nextop = F8;    //modrm
-                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                        STRx_U12(x5, ed, 0);
-                        STRH_U12(x6, ed, 8);
-                    } else {
-                        if(box64_x87_no80bits) {
-                            X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
-                            VLDR64_U12(v1, ed, fixedaddress);
-                        } else {
-                            if(ed!=x1) {
-                                MOVx_REG(x1, ed);
-                            }
-                            X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x3);
-                            CALL(native_fld, -1);
-                        }
-                    }
-                    break;
-                case 7:
-                    INST_NAME("FSTP tbyte");
+                    STRx_U12(x5, ed, 0);
+                    STRH_U12(x6, ed, 8);
+                } else {
                     if(box64_x87_no80bits) {
-                        v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                        VST64(v1, wback, fixedaddress);
+                        X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_D);
+                        VLDR64_U12(v1, ed, fixedaddress);
                     } else {
-                        #if 0
-                        x87_forget(dyn, ninst, x1, x3, 0);
-                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
                         if(ed!=x1) {
                             MOVx_REG(x1, ed);
                         }
-                        CALL(native_fstp, -1);
-                        #else
-                        // Painfully long, straight conversion from the C code, shoud be optimized
-                        v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                        FMOVxD(x1, v1);
-                        // do special value first
-                        TSTx_mask(x1, 1, 0b00000, 0b111110);    //0x7fffffffffffffffL
-                        B_MARK(cNE);
-                        // Zero
-                        LSRx(x3, x1, 63-15);    //x3 = sign+exp
-                        MOVZw(x5, 0);           // x5 = mantisse
-                        B_MARK3_nocond;
-                        MARK;
-                        // get sign, in main ouput x5 for sign+exp
-                        ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
-                        LSRx(x5, x5, 63-15);    // x5 = sign
-                        // get exp
-                        LSRx(x3, x1, 52);       // x3 = exp11
-                        ANDw_mask(x3, x3, 0, 0b1010);    //0x7ff
-                        MOV32w(x4, 0x7ff);
-                        CMPSw_REG(x3, x4);
-                        B_MARK2(cNE);
-                        // NaN and Infinite
-                        ORRw_mask(x3, x5, 0, 0b1110);    //x3 = sign | 0x7fff
-                        TSTx_mask(x1, 1, 0, 0b110011); //0x000fffffffffffffL
-                        ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
-                        ORRx_mask(x4, xZR, 1, 0b10, 0b01); //0xc000000000000000
-                        CSELx(x5, x5, x4, cEQ);     // x5 = mantisse
-                        B_MARK3_nocond;
-                        MARK2;
-                        // regular / denormals
-                        ANDx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL
-                        LSLx_IMM(x1, x1, 11);   //x1 = mantisse missing "1"
-                        MOVZw(x4, 16383-1023);  //BIAS80 - BIAS64
-                        CBZw(x3, 4+3*4);        // exp11 == 0?
-                        // normals
-                        ADDw_REG(x3, x3, x4);   // x3 = exp16
-                        ORRw_REG(x3, x3, x5);   // x3 = sign | exp
-                        ORRx_mask(x5, x1, 1, 1, 0);    //0x8000000000000000 x5 = mantisse
-                        B_MARK3_nocond;
-                        // denormals
-                        CLZx(x6, x1);
-                        ADDw_U12(x6, x6, 1);    // "one"
-                        SUBw_REG(x3, x4, x6);   // x3 = exp16
-                        ORRw_REG(x3, x3, x5);   // x3 = sign | exp16
-                        LSLx_REG(x5, x1, x6);   // x5 = mantisse
-                        MARK3;
-                        STRx_U12(x5, wback, 0);
-                        STRH_U12(x3, wback, 8);
-                        #endif
+                        X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x3);
+                        CALL(native_fld, -1);
                     }
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                default:
-                    DEFAULT;
-            }
-    }
+                }
+                break;
+            case 7:
+                INST_NAME("FSTP tbyte");
+                if(box64_x87_no80bits) {
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                    VST64(v1, wback, fixedaddress);
+                } else {
+                    #if 0
+                    x87_forget(dyn, ninst, x1, x3, 0);
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                    if(ed!=x1) {
+                        MOVx_REG(x1, ed);
+                    }
+                    CALL(native_fstp, -1);
+                    #else
+                    // Painfully long, straight conversion from the C code, shoud be optimized
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                    FMOVxD(x1, v1);
+                    // do special value first
+                    TSTx_mask(x1, 1, 0b00000, 0b111110);    //0x7fffffffffffffffL
+                    B_MARK(cNE);
+                    // Zero
+                    LSRx(x3, x1, 63-15);    //x3 = sign+exp
+                    MOVZw(x5, 0);           // x5 = mantisse
+                    B_MARK3_nocond;
+                    MARK;
+                    // get sign, in main ouput x5 for sign+exp
+                    ANDx_mask(x5, x1, 1, 1, 0); //0x8000000000000000
+                    LSRx(x5, x5, 63-15);    // x5 = sign
+                    // get exp
+                    LSRx(x3, x1, 52);       // x3 = exp11
+                    ANDw_mask(x3, x3, 0, 0b1010);    //0x7ff
+                    MOV32w(x4, 0x7ff);
+                    CMPSw_REG(x3, x4);
+                    B_MARK2(cNE);
+                    // NaN and Infinite
+                    ORRw_mask(x3, x5, 0, 0b1110);    //x3 = sign | 0x7fff
+                    TSTx_mask(x1, 1, 0, 0b110011); //0x000fffffffffffffL
+                    ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
+                    ORRx_mask(x4, xZR, 1, 0b10, 0b01); //0xc000000000000000
+                    CSELx(x5, x5, x4, cEQ);     // x5 = mantisse
+                    B_MARK3_nocond;
+                    MARK2;
+                    // regular / denormals
+                    ANDx_mask(x1, x1, 1, 0, 0b110011); //0x000fffffffffffffL
+                    LSLx_IMM(x1, x1, 11);   //x1 = mantisse missing "1"
+                    MOVZw(x4, 16383-1023);  //BIAS80 - BIAS64
+                    CBZw(x3, 4+3*4);        // exp11 == 0?
+                    // normals
+                    ADDw_REG(x3, x3, x4);   // x3 = exp16
+                    ORRw_REG(x3, x3, x5);   // x3 = sign | exp
+                    ORRx_mask(x5, x1, 1, 1, 0);    //0x8000000000000000 x5 = mantisse
+                    B_MARK3_nocond;
+                    // denormals
+                    CLZx(x6, x1);
+                    ADDw_U12(x6, x6, 1);    // "one"
+                    SUBw_REG(x3, x4, x6);   // x3 = exp16
+                    ORRw_REG(x3, x3, x5);   // x3 = sign | exp16
+                    LSLx_REG(x5, x1, x6);   // x5 = mantisse
+                    MARK3;
+                    STRx_U12(x5, wback, 0);
+                    STRH_U12(x3, wback, 8);
+                    #endif
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            default:
+                DEFAULT;
+        }
     return addr;
 }
diff --git a/src/dynarec/arm64/dynarec_arm64_dc.c b/src/dynarec/arm64/dynarec_arm64_dc.c
index 67be2c25..a06e765b 100644
--- a/src/dynarec/arm64/dynarec_arm64_dc.c
+++ b/src/dynarec/arm64/dynarec_arm64_dc.c
@@ -36,6 +36,7 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     MAYUSE(v2);
     MAYUSE(v1);
 
+    if(MODREG)
     switch(nextop) {
         case 0xC0:
         case 0xC1:
@@ -177,75 +178,77 @@ uintptr_t dynarec64_DC(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }
             break;
         default:
-            switch((nextop>>3)&7) {
-                case 0:
-                    INST_NAME("FADD ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v2, wback, fixedaddress);
-                    FADDD(v1, v1, v2);
-                    break;
-                case 1:
-                    INST_NAME("FMUL ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v2, wback, fixedaddress);
-                    FMULD(v1, v1, v2);
-                    break;
-                case 2:
-                    INST_NAME("FCOM ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v2, wback, fixedaddress);
-                    FCMPD(v1, v2);
-                    FCOM(x1, x2, x3);
-                    break;
-                case 3:
-                    INST_NAME("FCOMP ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v2, wback, fixedaddress);
-                    FCMPD(v1, v2);
-                    FCOM(x1, x2, x3);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FSUB ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v2, wback, fixedaddress);
-                    FSUBD(v1, v1, v2);
-                    break;
-                case 5:
-                    INST_NAME("FSUBR ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v2, wback, fixedaddress);
-                    FSUBD(v1, v2, v1);
-                    break;
-                case 6:
-                    INST_NAME("FDIV ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v2, wback, fixedaddress);
-                    FDIVD(v1, v1, v2);
-                    break;
-                case 7:
-                    INST_NAME("FDIVR ST0, double[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v2, wback, fixedaddress);
-                    FDIVD(v1, v2, v1);
-                    break;
-            }
-    }
+            DEFAULT;
+            break;
+    } else
+        switch((nextop>>3)&7) {
+            case 0:
+                INST_NAME("FADD ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v2, wback, fixedaddress);
+                FADDD(v1, v1, v2);
+                break;
+            case 1:
+                INST_NAME("FMUL ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v2, wback, fixedaddress);
+                FMULD(v1, v1, v2);
+                break;
+            case 2:
+                INST_NAME("FCOM ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v2, wback, fixedaddress);
+                FCMPD(v1, v2);
+                FCOM(x1, x2, x3);
+                break;
+            case 3:
+                INST_NAME("FCOMP ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v2, wback, fixedaddress);
+                FCMPD(v1, v2);
+                FCOM(x1, x2, x3);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FSUB ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v2, wback, fixedaddress);
+                FSUBD(v1, v1, v2);
+                break;
+            case 5:
+                INST_NAME("FSUBR ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v2, wback, fixedaddress);
+                FSUBD(v1, v2, v1);
+                break;
+            case 6:
+                INST_NAME("FDIV ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v2, wback, fixedaddress);
+                FDIVD(v1, v1, v2);
+                break;
+            case 7:
+                INST_NAME("FDIVR ST0, double[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v2, wback, fixedaddress);
+                FDIVD(v1, v2, v1);
+                break;
+        }
     return addr;
 }
diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c
index b8ab6965..ed7c0ecf 100644
--- a/src/dynarec/arm64/dynarec_arm64_dd.c
+++ b/src/dynarec/arm64/dynarec_arm64_dd.c
@@ -40,6 +40,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     MAYUSE(v1);
     MAYUSE(j64);
 
+    if(MODREG)
     switch(nextop) {
         case 0xC0:
         case 0xC1:
@@ -129,121 +130,96 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
 
-        case 0xC8:
-        case 0xC9:
-        case 0xCA:
-        case 0xCB:
-        case 0xCC:
-        case 0xCD:
-        case 0xCE:
-        case 0xCF:
-        case 0xF0:
-        case 0xF1:
-        case 0xF2:
-        case 0xF3:
-        case 0xF4:
-        case 0xF5:
-        case 0xF6:
-        case 0xF7:
-        case 0xF8:
-        case 0xF9:
-        case 0xFA:
-        case 0xFB:
-        case 0xFC:
-        case 0xFD:
-        case 0xFE:
-        case 0xFF:
+        default:
             DEFAULT;
             break;
-
-        default:
-            switch((nextop>>3)&7) {
-                case 0:
-                    INST_NAME("FLD double");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x3, NEON_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v1, ed, fixedaddress);
-                    break;
-                case 1:
-                    INST_NAME("FISTTP i64, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_I64);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    if(ST_IS_I64(0)) {
-                        VST64(v1, ed, fixedaddress);
+    } else
+        switch((nextop>>3)&7) {
+            case 0:
+                INST_NAME("FLD double");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x3, NEON_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v1, ed, fixedaddress);
+                break;
+            case 1:
+                INST_NAME("FISTTP i64, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_I64);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                if(ST_IS_I64(0)) {
+                    VST64(v1, ed, fixedaddress);
+                } else {
+                    s0 = fpu_get_scratch(dyn);
+                    if(arm64_frintts) {
+                        FRINT64ZD(s0, v1);
+                        FCVTZSxD(x2, s0);
+                        STRx_U12(x2, ed, fixedaddress);
                     } else {
-                        s0 = fpu_get_scratch(dyn);
-                        if(arm64_frintts) {
-                            FRINT64ZD(s0, v1);
-                            FCVTZSxD(x2, s0);
-                            STRx_U12(x2, ed, fixedaddress);
-                        } else {
-                            MRS_fpsr(x5);
-                            BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                            MSR_fpsr(x5);
-                            FRINTRRD(s0, v1, 3);
-                            FCVTZSxD(x2, s0);
-                            STx(x2, ed, fixedaddress);
-                            MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                            TBZ_MARK3(x5, FPSR_IOC);
-                            ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
-                            STx(x5, ed, fixedaddress);
-                            MARK3;
-                        }
+                        MRS_fpsr(x5);
+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                        MSR_fpsr(x5);
+                        FRINTRRD(s0, v1, 3);
+                        FCVTZSxD(x2, s0);
+                        STx(x2, ed, fixedaddress);
+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                        TBZ_MARK3(x5, FPSR_IOC);
+                        ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
+                        STx(x5, ed, fixedaddress);
+                        MARK3;
                     }
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 2:
-                    INST_NAME("FST double");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VST64(v1, ed, fixedaddress);
-                    break;
-                case 3:
-                    INST_NAME("FSTP double");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VST64(v1, ed, fixedaddress);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FRSTOR m108byte");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    if(ed!=x1) {MOVx_REG(x1, ed);}
-                    CALL(native_frstor, -1);
-                    break;
-                case 6:
-                    INST_NAME("FNSAVE m108byte");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    if(ed!=x1) {MOVx_REG(x1, ed);}
-                    CALL(native_fsave, -1);
-                    CALL(reset_fpu, -1);
-                    break;
-                case 7:
-                    INST_NAME("FNSTSW m2byte");
-                    //fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x4, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    LDRw_U12(x2, xEmu, offsetof(x64emu_t, top));
-                    LDRH_U12(x3, xEmu, offsetof(x64emu_t, sw));
-                    if(dyn->n.x87stack) {
-                        // update top
-                        if(dyn->n.x87stack>0) {
-                            SUBw_U12(x2, x2, dyn->n.x87stack);
-                        } else {
-                            ADDw_U12(x2, x2, -dyn->n.x87stack);
-                        }
-                        ANDw_mask(x2, x2, 0, 2);
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 2:
+                INST_NAME("FST double");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VST64(v1, ed, fixedaddress);
+                break;
+            case 3:
+                INST_NAME("FSTP double");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VST64(v1, ed, fixedaddress);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FRSTOR m108byte");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                if(ed!=x1) {MOVx_REG(x1, ed);}
+                CALL(native_frstor, -1);
+                break;
+            case 6:
+                INST_NAME("FNSAVE m108byte");
+                MESSAGE(LOG_DUMP, "Need Optimization\n");
+                fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                if(ed!=x1) {MOVx_REG(x1, ed);}
+                CALL(native_fsave, -1);
+                CALL(reset_fpu, -1);
+                break;
+            case 7:
+                INST_NAME("FNSTSW m2byte");
+                //fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x4, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                LDRw_U12(x2, xEmu, offsetof(x64emu_t, top));
+                LDRH_U12(x3, xEmu, offsetof(x64emu_t, sw));
+                if(dyn->n.x87stack) {
+                    // update top
+                    if(dyn->n.x87stack>0) {
+                        SUBw_U12(x2, x2, dyn->n.x87stack);
+                    } else {
+                        ADDw_U12(x2, x2, -dyn->n.x87stack);
                     }
-                    BFIw(x3, x2, 11, 3); // inject TOP at bit 11 (3 bits)
-                    STRH_U12(x3, xEmu, offsetof(x64emu_t, sw));
-                    STH(x3, ed, fixedaddress);   // store whole sw flags
-                    break;
-                default:
-                    DEFAULT;
-            }
-    }
+                    ANDw_mask(x2, x2, 0, 2);
+                }
+                BFIw(x3, x2, 11, 3); // inject TOP at bit 11 (3 bits)
+                STRH_U12(x3, xEmu, offsetof(x64emu_t, sw));
+                STH(x3, ed, fixedaddress);   // store whole sw flags
+                break;
+            default:
+                DEFAULT;
+        }
     return addr;
 }
diff --git a/src/dynarec/arm64/dynarec_arm64_de.c b/src/dynarec/arm64/dynarec_arm64_de.c
index c46b0817..660d667c 100644
--- a/src/dynarec/arm64/dynarec_arm64_de.c
+++ b/src/dynarec/arm64/dynarec_arm64_de.c
@@ -36,6 +36,7 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     MAYUSE(v2);
     MAYUSE(v1);
 
+    if(MODREG)
     switch(nextop) {
         case 0xC0:
         case 0xC1:
@@ -177,109 +178,102 @@ uintptr_t dynarec64_DE(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
-        case 0xD8:
-        case 0xDA:
-        case 0xDB:
-        case 0xDC:
-        case 0xDD:
-        case 0xDE:
-        case 0xDF:
+        default:
             DEFAULT;
             break;
-        default:
-            switch((nextop>>3)&7) {
-                case 0:
-                    INST_NAME("FIADD ST0, word[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    VLD16(v2, wback, fixedaddress);
-                    SXTL_16(v2, v2);
-                    SXTL_32(v2, v2);
-                    SCVTFDD(v2, v2);
-                    FADDD(v1, v1, v2);
-                    break;
-                case 1:
-                    INST_NAME("FIMUL ST0, word[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    VLD16(v2, wback, fixedaddress);
-                    SXTL_16(v2, v2);
-                    SXTL_32(v2, v2);
-                    SCVTFDD(v2, v2);
-                    FMULD(v1, v1, v2);
-                    break;
-                case 2:
-                    INST_NAME("FICOM ST0, word[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    VLD16(v2, wback, fixedaddress);
-                    SXTL_16(v2, v2);
-                    SXTL_32(v2, v2);
-                    SCVTFDD(v2, v2);
-                    FCMPD(v1, v2);
-                    FCOM(x1, x2, x3);
-                    break;
-                case 3:
-                    INST_NAME("FICOMP ST0, word[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    VLD16(v2, wback, fixedaddress);
-                    SXTL_16(v2, v2);
-                    SXTL_32(v2, v2);
-                    SCVTFDD(v2, v2);
-                    FCMPD(v1, v2);
-                    FCOM(x1, x2, x3);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 4:
-                    INST_NAME("FISUB ST0, word[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    VLD16(v2, wback, fixedaddress);
-                    SXTL_16(v2, v2);
-                    SXTL_32(v2, v2);
-                    SCVTFDD(v2, v2);
-                    FSUBD(v1, v1, v2);
-                    break;
-                case 5:
-                    INST_NAME("FISUBR ST0, word[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    VLD16(v2, wback, fixedaddress);
-                    SXTL_16(v2, v2);
-                    SXTL_32(v2, v2);
-                    SCVTFDD(v2, v2);
-                    FSUBD(v1, v2, v1);
-                    break;
-                case 6:
-                    INST_NAME("FIDIV ST0, word[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    VLD16(v2, wback, fixedaddress);
-                    SXTL_16(v2, v2);
-                    SXTL_32(v2, v2);
-                    SCVTFDD(v2, v2);
-                    FDIVD(v1, v1, v2);
-                    break;
-                case 7:
-                    INST_NAME("FIDIVR ST0, word[ED]");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
-                    v2 = fpu_get_scratch(dyn);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    VLD16(v2, wback, fixedaddress);
-                    SXTL_16(v2, v2);
-                    SXTL_32(v2, v2);
-                    SCVTFDD(v2, v2);
-                    FDIVD(v1, v2, v1);
-                    break;
-            }
-    }
+    } else
+        switch((nextop>>3)&7) {
+            case 0:
+                INST_NAME("FIADD ST0, word[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                VLD16(v2, wback, fixedaddress);
+                SXTL_16(v2, v2);
+                SXTL_32(v2, v2);
+                SCVTFDD(v2, v2);
+                FADDD(v1, v1, v2);
+                break;
+            case 1:
+                INST_NAME("FIMUL ST0, word[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                VLD16(v2, wback, fixedaddress);
+                SXTL_16(v2, v2);
+                SXTL_32(v2, v2);
+                SCVTFDD(v2, v2);
+                FMULD(v1, v1, v2);
+                break;
+            case 2:
+                INST_NAME("FICOM ST0, word[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                VLD16(v2, wback, fixedaddress);
+                SXTL_16(v2, v2);
+                SXTL_32(v2, v2);
+                SCVTFDD(v2, v2);
+                FCMPD(v1, v2);
+                FCOM(x1, x2, x3);
+                break;
+            case 3:
+                INST_NAME("FICOMP ST0, word[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                VLD16(v2, wback, fixedaddress);
+                SXTL_16(v2, v2);
+                SXTL_32(v2, v2);
+                SCVTFDD(v2, v2);
+                FCMPD(v1, v2);
+                FCOM(x1, x2, x3);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 4:
+                INST_NAME("FISUB ST0, word[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                VLD16(v2, wback, fixedaddress);
+                SXTL_16(v2, v2);
+                SXTL_32(v2, v2);
+                SCVTFDD(v2, v2);
+                FSUBD(v1, v1, v2);
+                break;
+            case 5:
+                INST_NAME("FISUBR ST0, word[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                VLD16(v2, wback, fixedaddress);
+                SXTL_16(v2, v2);
+                SXTL_32(v2, v2);
+                SCVTFDD(v2, v2);
+                FSUBD(v1, v2, v1);
+                break;
+            case 6:
+                INST_NAME("FIDIV ST0, word[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                VLD16(v2, wback, fixedaddress);
+                SXTL_16(v2, v2);
+                SXTL_32(v2, v2);
+                SCVTFDD(v2, v2);
+                FDIVD(v1, v1, v2);
+                break;
+            case 7:
+                INST_NAME("FIDIVR ST0, word[ED]");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                v2 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                VLD16(v2, wback, fixedaddress);
+                SXTL_16(v2, v2);
+                SXTL_32(v2, v2);
+                SCVTFDD(v2, v2);
+                FDIVD(v1, v2, v1);
+                break;
+        }
     return addr;
 }
diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c
index 5853fd39..dadbc858 100644
--- a/src/dynarec/arm64/dynarec_arm64_df.c
+++ b/src/dynarec/arm64/dynarec_arm64_df.c
@@ -41,6 +41,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     MAYUSE(v1);
     MAYUSE(j64);
 
+    if(MODREG)
     switch(nextop) {
         case 0xC0:
         case 0xC1:
@@ -114,283 +115,243 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
 
-        case 0xC8:
-        case 0xC9:
-        case 0xCA:
-        case 0xCB:
-        case 0xCC:
-        case 0xCD:
-        case 0xCE:
-        case 0xCF:
-        case 0xD0:
-        case 0xD1:
-        case 0xD2:
-        case 0xD3:
-        case 0xD4:
-        case 0xD5:
-        case 0xD6:
-        case 0xD7:
-        case 0xD8:
-        case 0xD9:
-        case 0xDA:
-        case 0xDB:
-        case 0xDC:
-        case 0xDD:
-        case 0xDE:
-        case 0xDF:
-        case 0xE1:
-        case 0xE2:
-        case 0xE3:
-        case 0xE4:
-        case 0xE5:
-        case 0xE6:
-        case 0xE7:
-        case 0xF8:
-        case 0xF9:
-        case 0xFA:
-        case 0xFB:
-        case 0xFC:
-        case 0xFD:
-        case 0xFE:
-        case 0xFF:
+        default:
             DEFAULT;
             break;
-
-        default:
-            switch((nextop>>3)&7) {
-                case 0:
-                    INST_NAME("FILD ST0, Ew");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_F);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    LDSHw(x1, wback, fixedaddress);
-                    if(ST_IS_F(0)) {
-                        SCVTFSw(v1, x1);
-                    } else {
-                        SCVTFDw(v1, x1);
-                    }
-                    break;
-                case 1:
-                    INST_NAME("FISTTP Ew, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    ed = x1;
-                    s0 = fpu_get_scratch(dyn);
-                    #if 0
-                    // this version needs ARM v8.5, and doesn't handle saturation for 32bits integer not fitting 16bits
-                    FRINT32ZD(s0, v1);
-                    // no saturation instruction on Arm, so using NEON
+    } else
+        switch((nextop>>3)&7) {
+            case 0:
+                INST_NAME("FILD ST0, Ew");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_F);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                LDSHw(x1, wback, fixedaddress);
+                if(ST_IS_F(0)) {
+                    SCVTFSw(v1, x1);
+                } else {
+                    SCVTFDw(v1, x1);
+                }
+                break;
+            case 1:
+                INST_NAME("FISTTP Ew, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                ed = x1;
+                s0 = fpu_get_scratch(dyn);
+                #if 0
+                // this version needs ARM v8.5, and doesn't handle saturation for 32bits integer not fitting 16bits
+                FRINT32ZD(s0, v1);
+                // no saturation instruction on Arm, so using NEON
+                VFCVTZSd(s0, s0);
+                SQXTN_S_D(s0, s0);
+                SQXTN_H_S(s0, s0);
+                VST16(s0, wback, fixedaddress);
+                #else
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                BFCw(x5, FPSR_QC, 1);   // reset QC bit
+                MSR_fpsr(x5);
+                if(ST_IS_F(0)) {
+                    VFCVTZSs(s0, v1);
+                } else {
+                    VFCVTZSd(s0, v1);
+                    SQXTN_S_D(s0, s0);
+                }
+                VMOVSto(x3, s0, 0);
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBNZ_MARK2(x5, FPSR_IOC);
+                SXTHw(x5, x3);  // check if 16bits value is fine
+                SUBw_REG(x5, x5, x3);
+                CBZw_MARK3(x5);
+                MARK2;
+                MOV32w(x3, 0x8000);
+                MARK3;
+                STH(x3, wback, fixedaddress);
+                #endif
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 2:
+                INST_NAME("FIST Ew, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                ed = x1;
+                s0 = fpu_get_scratch(dyn);
+                #if 0
+                FRINT32XD(s0, v1);
+                // no saturation instruction on Arm, so using NEON
+                VFCVTZSd(s0, s0);
+                SQXTN_S_D(s0, s0);
+                SQXTN_H_S(s0, s0);
+                VST16(s0, wback, fixedaddress);
+                #else
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                BFCw(x5, FPSR_QC, 1);   // reset QC bit
+                MSR_fpsr(x5);
+                if(ST_IS_F(0)) {
+                    FRINTXS(s0, v1);
+                    VFCVTZSs(s0, s0);
+                } else {
+                    FRINTXD(s0, v1);
                     VFCVTZSd(s0, s0);
                     SQXTN_S_D(s0, s0);
-                    SQXTN_H_S(s0, s0);
-                    VST16(s0, wback, fixedaddress);
-                    #else
-                    MRS_fpsr(x5);
-                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                    BFCw(x5, FPSR_QC, 1);   // reset QC bit
-                    MSR_fpsr(x5);
-                    if(ST_IS_F(0)) {
-                        VFCVTZSs(s0, v1);
-                    } else {
-                        VFCVTZSd(s0, v1);
-                        SQXTN_S_D(s0, s0);
-                    }
-                    VMOVSto(x3, s0, 0);
-                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                    TBNZ_MARK2(x5, FPSR_IOC);
-                    SXTHw(x5, x3);  // check if 16bits value is fine
-                    SUBw_REG(x5, x5, x3);
-                    CBZw_MARK3(x5);
-                    MARK2;
-                    MOV32w(x3, 0x8000);
-                    MARK3;
-                    STH(x3, wback, fixedaddress);
-                    #endif
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 2:
-                    INST_NAME("FIST Ew, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
-                    u8 = x87_setround(dyn, ninst, x1, x2, x4);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    ed = x1;
-                    s0 = fpu_get_scratch(dyn);
-                    #if 0
-                    FRINT32XD(s0, v1);
-                    // no saturation instruction on Arm, so using NEON
+                }
+                VMOVSto(x3, s0, 0);
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBNZ_MARK2(x5, FPSR_IOC);
+                SXTHw(x5, x3);  // check if 16bits value is fine
+                SUBw_REG(x5, x5, x3);
+                CBZw_MARK3(x5);
+                MARK2;
+                MOV32w(x3, 0x8000);
+                MARK3;
+                STH(x3, wback, fixedaddress);
+                #endif
+                x87_restoreround(dyn, ninst, u8);
+                break;
+            case 3:
+                INST_NAME("FISTP Ew, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
+                u8 = x87_setround(dyn, ninst, x1, x2, x4);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
+                ed = x1;
+                s0 = fpu_get_scratch(dyn);
+                #if 0
+                FRINT32XD(s0, v1);
+                // no saturation instruction on Arm, so using NEON
+                VFCVTZSd(s0, s0);
+                SQXTN_S_D(s0, s0);
+                SQXTN_H_S(s0, s0);
+                VST16(s0, wback, fixedaddress);
+                #else
+                MRS_fpsr(x5);
+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                MSR_fpsr(x5);
+                if(ST_IS_F(0)) {
+                    FRINTXS(s0, v1);
+                    VFCVTZSs(s0, s0);
+                } else {
+                    FRINTXD(s0, v1);
                     VFCVTZSd(s0, s0);
                     SQXTN_S_D(s0, s0);
-                    SQXTN_H_S(s0, s0);
-                    VST16(s0, wback, fixedaddress);
-                    #else
-                    MRS_fpsr(x5);
-                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                    BFCw(x5, FPSR_QC, 1);   // reset QC bit
-                    MSR_fpsr(x5);
-                    if(ST_IS_F(0)) {
-                        FRINTXS(s0, v1);
-                        VFCVTZSs(s0, s0);
-                    } else {
-                        FRINTXD(s0, v1);
-                        VFCVTZSd(s0, s0);
-                        SQXTN_S_D(s0, s0);
+                }
+                VMOVSto(x3, s0, 0);
+                MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                TBNZ_MARK2(x5, FPSR_IOC);
+                SXTHw(x5, x3);  // check if 16bits value is fine
+                SUBw_REG(x5, x5, x3);
+                CBZw_MARK3(x5);
+                MARK2;
+                MOV32w(x3, 0x8000);
+                MARK3;
+                STH(x3, wback, fixedaddress);
+                #endif
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                x87_restoreround(dyn, ninst, u8);
+                break;
+            case 4:
+                INST_NAME("FBLD ST0, tbytes");
+                X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                if(ed!=x1) {MOVx_REG(x1, ed);}
+                CALL(fpu_fbld, -1);
+                break;
+            case 5:
+                INST_NAME("FILD ST0, i64");
+                X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_I64);
+                addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                VLD64(v1, wback, fixedaddress);
+                if(!ST_IS_I64(0)) {
+                    if(rex.is32bits) {
+                        // need to also feed the STll stuff...
+                        ADDx_U12(x4, xEmu, offsetof(x64emu_t, fpu_ll));
+                        LDRw_U12(x1, xEmu, offsetof(x64emu_t, top));
+                        int a = 0 - dyn->n.x87stack;
+                        if(a) {
+                            if(a<0) {
+                                SUBw_U12(x1, x1, -a);
+                            } else {
+                                ADDw_U12(x1, x1, a);
+                            }
+                            ANDw_mask(x1, x1, 0, 2); //mask=7
+                        }
+                        ADDx_REG_LSL(x1, x4, x1, 4);    // fpu_ll is 2 i64
+                        VSTR64_U12(v1, x1, 8);  // ll
                     }
-                    VMOVSto(x3, s0, 0);
-                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                    TBNZ_MARK2(x5, FPSR_IOC);
-                    SXTHw(x5, x3);  // check if 16bits value is fine
-                    SUBw_REG(x5, x5, x3);
-                    CBZw_MARK3(x5);
-                    MARK2;
-                    MOV32w(x3, 0x8000);
-                    MARK3;
-                    STH(x3, wback, fixedaddress);
-                    #endif
-                    x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 3:
-                    INST_NAME("FISTP Ew, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_F);
+                    SCVTFDD(v1, v1);
+                    if(rex.is32bits) {
+                        VSTR64_U12(v1, x1, 0);  // ref
+                    }
+                }
+                break;
+            case 6:
+                INST_NAME("FBSTP tbytes, ST0");
+                i1 = x87_stackcount(dyn, ninst, x1);
+                x87_forget(dyn, ninst, x1, x2, 0);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
+                if(ed!=x1) {MOVx_REG(x1, ed);}
+                CALL(fpu_fbst, -1);
+                x87_unstackcount(dyn, ninst, x1, i1);
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            case 7:
+                INST_NAME("FISTP i64, ST0");
+                v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_I64);
+                if(!ST_IS_I64(0)) {
                     u8 = x87_setround(dyn, ninst, x1, x2, x4);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);
-                    ed = x1;
-                    s0 = fpu_get_scratch(dyn);
+                }
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
+                ed = x1;
+                s0 = fpu_get_scratch(dyn);
+                if(ST_IS_I64(0)) {
+                    VST64(v1, wback, fixedaddress);
+                } else {
                     #if 0
-                    FRINT32XD(s0, v1);
-                    // no saturation instruction on Arm, so using NEON
+                    FRINT64XD(s0, v1);
                     VFCVTZSd(s0, s0);
-                    SQXTN_S_D(s0, s0);
-                    SQXTN_H_S(s0, s0);
-                    VST16(s0, wback, fixedaddress);
+                    VSTR64_U12(s0, wback, fixedaddress);
                     #else
+                    if(rex.is32bits) {
+                        // need to check STll first...
+                        ADDx_U12(x5, xEmu, offsetof(x64emu_t, fpu_ll));
+                        LDRw_U12(x1, xEmu, offsetof(x64emu_t, top));
+                        VMOVQDto(x3, v1, 0);
+                        int a = 0 - dyn->n.x87stack;
+                        if(a) {
+                            if(a<0) {
+                                SUBw_U12(x1, x1, -a);
+                            } else {
+                                ADDw_U12(x1, x1, a);
+                            }
+                            ANDw_mask(x1, x1, 0, 2); //mask=7
+                        }
+                        ADDx_REG_LSL(x1, x5, x1, 4);    // fpu_ll is 2 i64
+                        LDRx_U12(x5, x1, 0);  // ref
+                        SUBx_REG(x5, x5, x3);
+                        CBNZx_MARK2(x5);
+                        LDRx_U12(x5, x1, 8);  // ll
+                        STx(x5, wback, fixedaddress);
+                        B_MARK3(c__);
+                        MARK2;
+                    }
                     MRS_fpsr(x5);
                     BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
                     MSR_fpsr(x5);
-                    if(ST_IS_F(0)) {
-                        FRINTXS(s0, v1);
-                        VFCVTZSs(s0, s0);
-                    } else {
-                        FRINTXD(s0, v1);
-                        VFCVTZSd(s0, s0);
-                        SQXTN_S_D(s0, s0);
-                    }
-                    VMOVSto(x3, s0, 0);
+                    FRINTXD(s0, v1);
+                    VFCVTZSd(s0, s0);
+                    VST64(s0, wback, fixedaddress);
                     MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                    TBNZ_MARK2(x5, FPSR_IOC);
-                    SXTHw(x5, x3);  // check if 16bits value is fine
-                    SUBw_REG(x5, x5, x3);
-                    CBZw_MARK3(x5);
-                    MARK2;
-                    MOV32w(x3, 0x8000);
+                    TBZ_MARK3(x5, FPSR_IOC);
+                    ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
+                    STx(x5, wback, fixedaddress);
                     MARK3;
-                    STH(x3, wback, fixedaddress);
                     #endif
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
                     x87_restoreround(dyn, ninst, u8);
-                    break;
-                case 4:
-                    INST_NAME("FBLD ST0, tbytes");
-                    X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    if(ed!=x1) {MOVx_REG(x1, ed);}
-                    CALL(fpu_fbld, -1);
-                    break;
-                case 5:
-                    INST_NAME("FILD ST0, i64");
-                    X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, NEON_CACHE_ST_I64);
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    VLD64(v1, wback, fixedaddress);
-                    if(!ST_IS_I64(0)) {
-                        if(rex.is32bits) {
-                            // need to also feed the STll stuff...
-                            ADDx_U12(x4, xEmu, offsetof(x64emu_t, fpu_ll));
-                            LDRw_U12(x1, xEmu, offsetof(x64emu_t, top));
-                            int a = 0 - dyn->n.x87stack;
-                            if(a) {
-                                if(a<0) {
-                                    SUBw_U12(x1, x1, -a);
-                                } else {
-                                    ADDw_U12(x1, x1, a);
-                                }
-                                ANDw_mask(x1, x1, 0, 2); //mask=7
-                            }
-                            ADDx_REG_LSL(x1, x4, x1, 4);    // fpu_ll is 2 i64
-                            VSTR64_U12(v1, x1, 8);  // ll
-                        }
-                        SCVTFDD(v1, v1);
-                        if(rex.is32bits) {
-                            VSTR64_U12(v1, x1, 0);  // ref
-                        }
-                    }
-                    break;
-                case 6:
-                    INST_NAME("FBSTP tbytes, ST0");
-                    i1 = x87_stackcount(dyn, ninst, x1);
-                    x87_forget(dyn, ninst, x1, x2, 0);
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);
-                    if(ed!=x1) {MOVx_REG(x1, ed);}
-                    CALL(fpu_fbst, -1);
-                    x87_unstackcount(dyn, ninst, x1, i1);
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                case 7:
-                    INST_NAME("FISTP i64, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_I64);
-                    if(!ST_IS_I64(0)) {
-                        u8 = x87_setround(dyn, ninst, x1, x2, x4);
-                    }
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    ed = x1;
-                    s0 = fpu_get_scratch(dyn);
-                    if(ST_IS_I64(0)) {
-                        VST64(v1, wback, fixedaddress);
-                    } else {
-                        #if 0
-                        FRINT64XD(s0, v1);
-                        VFCVTZSd(s0, s0);
-                        VSTR64_U12(s0, wback, fixedaddress);
-                        #else
-                        if(rex.is32bits) {
-                            // need to check STll first...
-                            ADDx_U12(x5, xEmu, offsetof(x64emu_t, fpu_ll));
-                            LDRw_U12(x1, xEmu, offsetof(x64emu_t, top));
-                            VMOVQDto(x3, v1, 0);
-                            int a = 0 - dyn->n.x87stack;
-                            if(a) {
-                                if(a<0) {
-                                    SUBw_U12(x1, x1, -a);
-                                } else {
-                                    ADDw_U12(x1, x1, a);
-                                }
-                                ANDw_mask(x1, x1, 0, 2); //mask=7
-                            }
-                            ADDx_REG_LSL(x1, x5, x1, 4);    // fpu_ll is 2 i64
-                            LDRx_U12(x5, x1, 0);  // ref
-                            SUBx_REG(x5, x5, x3);
-                            CBNZx_MARK2(x5);
-                            LDRx_U12(x5, x1, 8);  // ll
-                            STx(x5, wback, fixedaddress);
-                            B_MARK3(c__);
-                            MARK2;
-                        }
-                        MRS_fpsr(x5);
-                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                        MSR_fpsr(x5);
-                        FRINTXD(s0, v1);
-                        VFCVTZSd(s0, s0);
-                        VST64(s0, wback, fixedaddress);
-                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                        TBZ_MARK3(x5, FPSR_IOC);
-                        ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
-                        STx(x5, wback, fixedaddress);
-                        MARK3;
-                        #endif
-                        x87_restoreround(dyn, ninst, u8);
-                    }
-                    X87_POP_OR_FAIL(dyn, ninst, x3);
-                    break;
-                default:
-                    DEFAULT;
-            }
-    }
+                }
+                X87_POP_OR_FAIL(dyn, ninst, x3);
+                break;
+            default:
+                DEFAULT;
+        }
     return addr;
 }