From 8180cb321818abcec2ffe41d908c3095380e79b1 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Tue, 29 Jul 2025 23:28:34 +0800 Subject: [RV64][LA64] Added partial FLUSHTO0 support (#2855) * [RV64][LA64] Removed obselete TODOs * more * more * more * more --- src/dynarec/la64/dynarec_la64_0f.c | 36 +++++++++++++++++++++- src/dynarec/la64/dynarec_la64_avx_0f.c | 39 ++++++++++++++++++++---- src/dynarec/rv64/dynarec_rv64_0f.c | 55 +++++++++++++++++++++++++++++++++- src/emu/x64emu.c | 2 ++ 4 files changed, 125 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 81eac050..c52eb714 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -1506,13 +1506,47 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(0); ST_W(ed, xEmu, offsetof(x64emu_t, mxcsr)); if (BOX64ENV(sse_flushto0)) { - // TODO + /* LA <-> x86 + 16/24 <-> 5 inexact + 17/25 <-> 4 underflow + 18/26 <-> 3 overflow + 19/27 <-> 2 divide by zero + x <-> 1 denormal + 20/28 <-> 0 invalid operation + */ + // Doing x86 -> LA here, ignore denormal + XOR(x4, x4, x4); + BSTRPICK_W(x3, ed, 5, 5); + BSTRINS_W(x4, x3, 16, 16); + BSTRPICK_W(x3, ed, 4, 4); + BSTRINS_W(x4, x3, 17, 17); + BSTRPICK_W(x3, ed, 3, 3); + BSTRINS_W(x4, x3, 18, 18); + BSTRPICK_W(x3, ed, 2, 2); + BSTRINS_W(x4, x3, 19, 19); + BSTRPICK_W(x3, ed, 0, 0); + BSTRINS_W(x4, x3, 20, 20); + MOVGR2FCSR(FCSR2, x4); } break; case 3: INST_NAME("STMXCSR Md"); addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr)); + if (BOX64ENV(sse_flushto0)) { + MOVFCSR2GR(x5, FCSR2); + // Doing LA -> x86 here, ignore denormal + BSTRPICK_W(x3, x5, 16, 16); + BSTRINS_W(x4, x3, 5, 5); + BSTRPICK_W(x3, x5, 17, 17); + BSTRINS_W(x4, x3, 4, 4); + BSTRPICK_W(x3, x5, 18, 18); + BSTRINS_W(x4, x3, 3, 3); + BSTRPICK_W(x3, x5, 19, 19); + BSTRINS_W(x4, x3, 2, 2); + BSTRPICK_W(x3, x5, 20, 20); + BSTRINS_W(x4, x3, 0, 0); + } ST_W(x4, wback, fixedaddress); break; case 4: diff --git a/src/dynarec/la64/dynarec_la64_avx_0f.c b/src/dynarec/la64/dynarec_la64_avx_0f.c index c0302d45..cb1921a5 100644 --- a/src/dynarec/la64/dynarec_la64_avx_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_0f.c @@ -454,19 +454,48 @@ uintptr_t dynarec64_AVX_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, in GETED(0); ST_W(ed, xEmu, offsetof(x64emu_t, mxcsr)); if (BOX64ENV(sse_flushto0)) { - // sync with fpsr, with mask from mxcsr - // TODO + /* LA <-> x86 + 16/24 <-> 5 inexact + 17/25 <-> 4 underflow + 18/26 <-> 3 overflow + 19/27 <-> 2 divide by zero + x <-> 1 denormal + 20/28 <-> 0 invalid operation + */ + // Doing x86 -> LA here, ignore denormal + XOR(x4, x4, x4); + BSTRPICK_W(x3, ed, 5, 5); + BSTRINS_W(x4, x3, 16, 16); + BSTRPICK_W(x3, ed, 4, 4); + BSTRINS_W(x4, x3, 17, 17); + BSTRPICK_W(x3, ed, 3, 3); + BSTRINS_W(x4, x3, 18, 18); + BSTRPICK_W(x3, ed, 2, 2); + BSTRINS_W(x4, x3, 19, 19); + BSTRPICK_W(x3, ed, 0, 0); + BSTRINS_W(x4, x3, 20, 20); + MOVGR2FCSR(FCSR2, x4); } break; case 3: INST_NAME("VSTMXCSR Md"); addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); LD_WU(x4, xEmu, offsetof(x64emu_t, mxcsr)); - ST_W(x4, wback, fixedaddress); if (BOX64ENV(sse_flushto0)) { - // sync with fpsr, with mask from mxcsr - // TODO + MOVFCSR2GR(x5, FCSR2); + // Doing LA -> x86 here, ignore denormal + BSTRPICK_W(x3, x5, 16, 16); + BSTRINS_W(x4, x3, 5, 5); + BSTRPICK_W(x3, x5, 17, 17); + BSTRINS_W(x4, x3, 4, 4); + BSTRPICK_W(x3, x5, 18, 18); + BSTRINS_W(x4, x3, 3, 3); + BSTRPICK_W(x3, x5, 19, 19); + BSTRINS_W(x4, x3, 2, 2); + BSTRPICK_W(x3, x5, 20, 20); + BSTRINS_W(x4, x3, 0, 0); } + ST_W(x4, wback, fixedaddress); break; default: DEFAULT; diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index d1745b7d..1ced055c 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -1978,13 +1978,66 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETED(0); SW(ed, xEmu, offsetof(x64emu_t, mxcsr)); if (BOX64ENV(sse_flushto0)) { - // TODO: applyFlushTo0 also needs to add RISC-V support. + /* RV <-> x86 + 0 <-> 5 inexact + 1 <-> 4 underflow + 2 <-> 3 overflow + 3 <-> 2 divide by zero + x <-> 1 denormal + 4 <-> 0 invalid operation + */ + // Doing x86 -> RV here, 543210 => 0123x4, ignore denormal + // x5 = (ed & 0b1) << 4 + SLLIW(x5, ed, 4); + ANDI(x5, x5, 16); + // x3 = x5 | ((ed & 0b100) << 1); + SLLIW(x3, ed, 1); + ANDI(x3, x3, 8); + OR(x3, x3, x5); + // x3 = x3 | (ed & 0b1000) >> 1; + SRLIW(x4, ed, 1); + ANDI(x4, x4, 4); + OR(x3, x3, x4); + // x3 = x3 | (ed & 0b10000) >> 3; + SRLIW(x5, ed, 3); + ANDI(x5, x5, 2); + OR(x3, x3, x5); + // x3 = x3 | (ed & 0b100000) >> 5; + SRLIW(x5, ed, 5); + ANDI(x5, x5, 1); + OR(x3, x3, x5); + CSRRW(xZR, x3, /* fflags */ 0x001); } break; case 3: INST_NAME("STMXCSR Md"); addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0); LWU(x4, xEmu, offsetof(x64emu_t, mxcsr)); + if (BOX64ENV(sse_flushto0)) { + // Doing RV -> x86, 43210 => 02345, ignore denormal + ANDI(x4, x4, 0xfc0); + CSRRS(x3, xZR, /* fflags */ 0x001); + // x4 = x4 | (x3 & 0b1) << 5; + SLLIW(x5, x3, 5); + ANDI(x5, x5, 32); + OR(x4, x4, x5); + // x4 = x4 | (x3 & 0b10) << 3; + SLLIW(x6, x3, 3); + ANDI(x6, x6, 16); + OR(x4, x4, x6); + // x4 = x4 | (x3 & 0b100) << 1; + SLLIW(x6, x3, 1); + ANDI(x6, x6, 8); + OR(x4, x4, x6); + // x4 = x4 | (x3 & 0b1000) >> 1; + SRLIW(x5, x3, 1); + ANDI(x5, x5, 4); + OR(x4, x4, x5); + // x4 = x4 | (x3 & 0b10000) >> 4; + SRLIW(x5, x3, 4); + ANDI(x5, x5, 2); + OR(x4, x4, x5); + } SW(x4, wback, fixedaddress); break; case 4: diff --git a/src/emu/x64emu.c b/src/emu/x64emu.c index 9ce8c180..7627d4bb 100644 --- a/src/emu/x64emu.c +++ b/src/emu/x64emu.c @@ -607,6 +607,8 @@ void applyFlushTo0(x64emu_t* emu) #else __builtin_aarch64_set_fpcr(fpcr); #endif + #else + // This does not applies to RISC-V and LoongArch, as they don't have flush to zero #endif } -- cgit 1.4.1