diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-09-01 17:39:21 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-01 11:39:21 +0200 |
| commit | e5556e90df9835ad2a777c42be50d943a2c5bcc5 (patch) | |
| tree | 8c51f99ed7570add4ffc3bb3636775b4eea6d8f7 /src/dynarec | |
| parent | 1edd0eb47d207b21af0586db36abfc3625d1a849 (diff) | |
| download | box64-e5556e90df9835ad2a777c42be50d943a2c5bcc5.tar.gz box64-e5556e90df9835ad2a777c42be50d943a2c5bcc5.zip | |
[INTERP][DYNAREC] Aligned !fastnan handling of 0F 51/52 opcodes (#2989)
* [INTERP] Added !fastnan handling to some 0F opcodes * [RV64_DYNAREC] Fixed/refined !fastnan handling of some 0F opcodes * la64 * more fixes
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 12 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 53 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 2 |
3 files changed, 52 insertions, 15 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 5dc1b5ba..cb815e9a 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -782,7 +782,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETEX(q0, 0, 0); GETGX_empty(v0); - VFSQRT_S(v0, q0); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFCMP_S(d0, q0, q0, cEQ); + VFSQRT_S(v0, q0); + VFCMP_S(d1, v0, v0, cEQ); + VANDN_V(d1, d1, d0); + VSLLI_W(d1, d1, 31); + VOR_V(v0, v0, d1); + } else + VFSQRT_S(v0, q0); break; case 0x52: INST_NAME("RSQRTPS Gx, Ex"); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index e9de73cf..2e28e15a 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -973,10 +973,24 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(); GETEX(x2, 0, 12); d0 = fpu_get_scratch(dyn); + s1 = fpu_get_scratch(dyn); // 1.0f + LUI(x3, 0x3f800); + FMVWX(s1, x3); // 1.0f for (int i = 0; i < 4; ++i) { FLW(d0, wback, fixedaddress + 4 * i); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, d0, d0); + BNEZ(x3, 4 + 2 * 4); // isnan(d0)? copy it + FSW(d0, gback, gdoffset + i * 4); + J(4 + 5 * 4); // continue + } FSQRTS(d0, d0); - FSW(d0, gback, gdoffset + 4 * i); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, d0, d0); + BNEZ(x3, 4 + 4); // isnan(d0)? negate it + FNEGS(d0, d0); + } + FSW(d0, gback, gdoffset + i * 4); } break; case 0x52: @@ -987,28 +1001,29 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); // 1.0f v0 = fpu_get_scratch(dyn); // 0.0f - // do accurate computation, because riscv doesn't have rsqrt - MOV32w(x3, 1); - FCVTSW(s1, x3, RD_DYN); + LUI(x3, 0x3f800); + FMVWX(s1, x3); // 1.0f if (!BOX64ENV(dynarec_fastnan)) { FCVTSW(v0, xZR, RD_DYN); } for (int i = 0; i < 4; ++i) { FLW(s0, wback, fixedaddress + i * 4); if (!BOX64ENV(dynarec_fastnan)) { - FLES(x3, v0, s0); // s0 >= 0.0f? - BNEZ(x3, 6 * 4); - FEQS(x3, s0, s0); // isnan(s0)? - BEQZ(x3, 2 * 4); - // s0 is negative, so generate a NaN - FDIVS(s0, s1, v0); - // s0 is a NaN, just copy it + FLTS(x3, v0, s0); // s0 > 0.0f? + BNEZ(x3, 4 + 5 * 4); + FEQS(x3, v0, s0); // s0 == 0.0f? + BEQZ(x3, 4 + 3 * 4); + FDIVS(s0, s1, v0); // generate an inf FSW(s0, gback, gdoffset + i * 4); - J(4 * 4); - // do regular computation + J(4 + 6 * 4); // continue } FSQRTS(s0, s0); FDIVS(s0, s1, s0); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, s0, s0); + BNEZ(x3, 4 + 4); // isnan(s0)? negate it + FNEGS(s0, s0); + } FSW(s0, gback, gdoffset + i * 4); } break; @@ -1023,7 +1038,18 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMVWX(d0, x3); // 1.0f for (int i = 0; i < 4; ++i) { FLW(d1, wback, fixedaddress + 4 * i); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, d1, d1); + BNEZ(x3, 4 + 2 * 4); // isnan(d1)? copy it + FSW(d1, gback, gdoffset + i * 4); + J(4 + 5 * 4); // continue + } FDIVS(d1, d0, d1); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, d1, d1); + BNEZ(x3, 4 + 4); // isnan(d1)? negate it + FNEGS(d1, d1); + } FSW(d1, gback, gdoffset + 4 * i); } break; @@ -1057,7 +1083,6 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x57: INST_NAME("XORPS Gx, Ex"); nextop = F8; - // TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit GETGX(); if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // just zero dest diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index c3e7dfdd..5eb36493 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -520,6 +520,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } break; case 0x51: + if (!BOX64ENV(dynarec_fastround)) return 0; INST_NAME("SQRTPS Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); @@ -540,6 +541,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VFRDIV_VF(v1, v1, v0, VECTOR_UNMASKED); break; case 0x53: + if (!BOX64ENV(dynarec_fastround)) return 0; INST_NAME("RCPPS Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); |