From 8a1e4cdf306ff3f57a8603004e068549248db29b Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Fri, 27 Oct 2023 17:06:04 +0200 Subject: [ARM64_DYNAREC] Ported fprem/fprem1 from box86 (fix camera issues in FlatOut / FlatOut 2) --- src/dynarec/arm64/dynarec_arm64_d9.c | 46 ++++++++++++++++++++++++++++++++++++ src/emu/x64emu_private.h | 3 ++- 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c index 20563efd..cf76770d 100644 --- a/src/dynarec/arm64/dynarec_arm64_d9.c +++ b/src/dynarec/arm64/dynarec_arm64_d9.c @@ -236,11 +236,34 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin case 0xF5: INST_NAME("FPREM1"); MESSAGE(LOG_DUMP, "Need Optimization\n"); + #if 0 i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fprem1, -1); x87_unstackcount(dyn, ninst, x1, i1); + #else + v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + s0 = fpu_get_scratch(dyn); + FDIVD(s0, v1, v2); + FRINTRRD(s0, s0, 0b00); // Nearest == TieToEven? + FCVTZSxD(x4, s0); + FMULD(s0, s0, v2); + FSUBD(v1, v1, s0); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, sw)); + // set C2 = 0 + BFCw(x1, 10, 1); + // set C1 = Q0 + BFIw(x1, x4, 9, 1); + // set C3 = Q1 + LSRx_IMM(x4, x4, 1); + BFIw(x1, x4, 14, 1); + // Set C0 = Q2 + LSRx(x4, x4, 1); + BFIw(x1, x4, 8, 1); + STRw_U12(x1, xEmu, offsetof(x64emu_t, sw)); + #endif break; case 0xF6: INST_NAME("FDECSTP"); @@ -260,12 +283,35 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 0xF8: INST_NAME("FPREM"); + #if 0 MESSAGE(LOG_DUMP, "Need Optimization\n"); i1 = x87_stackcount(dyn, ninst, x1); x87_forget(dyn, ninst, x1, x2, 0); x87_forget(dyn, ninst, x1, x2, 1); CALL(native_fprem, -1); x87_unstackcount(dyn, ninst, x1, i1); + #else + v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + v2 = x87_get_st(dyn, ninst, x1, x2, 1, NEON_CACHE_ST_D); + s0 = fpu_get_scratch(dyn); + FDIVD(s0, v1, v2); + FRINTZD(s0, s0); + FCVTZSxD(x4, s0); + FMULD(s0, s0, v2); + FSUBD(v1, v1, s0); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, sw)); + // set C2 = 0 + BFCw(x1, 10, 1); + // set C1 = Q0 + BFIw(x1, x4, 9, 1); + // set C3 = Q1 + LSRx_IMM(x4, x4, 1); + BFIw(x1, x4, 14, 1); + // Set C0 = Q2 + LSRx(x4, x4, 1); + BFIw(x1, x4, 8, 1); + STRw_U12(x1, xEmu, offsetof(x64emu_t, sw)); + #endif break; case 0xF9: INST_NAME("FYL2XP1"); diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index c5c4f4d9..aa6584a7 100644 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -61,10 +61,11 @@ typedef struct x64emu_s { // fpu / mmx mmx87_regs_t x87[8]; mmx87_regs_t mmx[8]; - x87control_t cw; x87flags_t sw; uint32_t top; // top is part of sw, but it's faster to have it separately int fpu_stack; + x87control_t cw; + uint16_t dummy_cw; // align... mmxcontrol_t mxcsr; fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence -- cgit 1.4.1