about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-27 17:06:04 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-10-27 17:07:04 +0200
commit8a1e4cdf306ff3f57a8603004e068549248db29b (patch)
tree994cd08a10e497972d8621b6673be4f75c7934e9 /src
parente574dca3737059c013e82238eea819fc27686504 (diff)
downloadbox64-8a1e4cdf306ff3f57a8603004e068549248db29b.tar.gz
box64-8a1e4cdf306ff3f57a8603004e068549248db29b.zip
[ARM64_DYNAREC] Ported fprem/fprem1 from box86 (fix camera issues in FlatOut / FlatOut 2)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d9.c46
-rw-r--r--src/emu/x64emu_private.h3
2 files changed, 48 insertions, 1 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c
index 20563efd..cf76770d 100644
--- a/src/dynarec/arm64/dynarec_arm64_d9.c
+++ b/src/dynarec/arm64/dynarec_arm64_d9.c
@@ -236,11 +236,34 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xF5:
             INST_NAME("FPREM1");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
+            #if 0
             i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fprem1, -1);
             x87_unstackcount(dyn, ninst, x1, i1);
+            #else
+            v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+            v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D);
+            s0 = fpu_get_scratch(dyn);
+            FDIVD(s0, v1, v2);
+            FRINTRRD(s0, s0, 0b00); // Nearest == TieToEven?
+            FCVTZSxD(x4, s0);
+            FMULD(s0, s0, v2);
+            FSUBD(v1, v1, s0);
+            LDRw_U12(x1, xEmu, offsetof(x64emu_t, sw));
+            // set C2 = 0
+            BFCw(x1, 10, 1);
+            // set C1 = Q0
+            BFIw(x1, x4, 9, 1);
+            // set C3 = Q1
+            LSRx_IMM(x4, x4, 1);
+            BFIw(x1, x4, 14, 1);
+            // Set C0 = Q2
+            LSRx(x4, x4, 1);
+            BFIw(x1, x4, 8, 1);
+            STRw_U12(x1, xEmu, offsetof(x64emu_t, sw));
+            #endif
             break;
         case 0xF6:
             INST_NAME("FDECSTP");
@@ -260,12 +283,35 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             break;
         case 0xF8:
             INST_NAME("FPREM");
+            #if 0
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             i1 = x87_stackcount(dyn, ninst, x1);
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             CALL(native_fprem, -1);
             x87_unstackcount(dyn, ninst, x1, i1);
+            #else
+            v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+            v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D);
+            s0 = fpu_get_scratch(dyn);
+            FDIVD(s0, v1, v2);
+            FRINTZD(s0, s0);
+            FCVTZSxD(x4, s0);
+            FMULD(s0, s0, v2);
+            FSUBD(v1, v1, s0);
+            LDRw_U12(x1, xEmu, offsetof(x64emu_t, sw));
+            // set C2 = 0
+            BFCw(x1, 10, 1);
+            // set C1 = Q0
+            BFIw(x1, x4, 9, 1);
+            // set C3 = Q1
+            LSRx_IMM(x4, x4, 1);
+            BFIw(x1, x4, 14, 1);
+            // Set C0 = Q2
+            LSRx(x4, x4, 1);
+            BFIw(x1, x4, 8, 1);
+            STRw_U12(x1, xEmu, offsetof(x64emu_t, sw));
+            #endif
             break;
         case 0xF9:
             INST_NAME("FYL2XP1");
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index c5c4f4d9..aa6584a7 100644
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -61,10 +61,11 @@ typedef struct x64emu_s {
     // fpu / mmx
 	mmx87_regs_t x87[8];
 	mmx87_regs_t mmx[8];
-	x87control_t cw;
 	x87flags_t  sw;
 	uint32_t    top;        // top is part of sw, but it's faster to have it separately
     int         fpu_stack;
+	x87control_t cw;
+    uint16_t    dummy_cw;   // align...
     mmxcontrol_t mxcsr;
     fpu_ld_t    fpu_ld[8]; // for long double emulation / 80bits fld fst
     fpu_ll_t    fpu_ll[8]; // for 64bits fild / fist sequence