diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 12 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 53 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f_vector.c | 2 | ||||
| -rw-r--r-- | src/emu/x64run0f.c | 10 |
4 files changed, 60 insertions, 17 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 5dc1b5ba..cb815e9a 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -782,7 +782,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni nextop = F8; GETEX(q0, 0, 0); GETGX_empty(v0); - VFSQRT_S(v0, q0); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFCMP_S(d0, q0, q0, cEQ); + VFSQRT_S(v0, q0); + VFCMP_S(d1, v0, v0, cEQ); + VANDN_V(d1, d1, d0); + VSLLI_W(d1, d1, 31); + VOR_V(v0, v0, d1); + } else + VFSQRT_S(v0, q0); break; case 0x52: INST_NAME("RSQRTPS Gx, Ex"); diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index e9de73cf..2e28e15a 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -973,10 +973,24 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni GETGX(); GETEX(x2, 0, 12); d0 = fpu_get_scratch(dyn); + s1 = fpu_get_scratch(dyn); // 1.0f + LUI(x3, 0x3f800); + FMVWX(s1, x3); // 1.0f for (int i = 0; i < 4; ++i) { FLW(d0, wback, fixedaddress + 4 * i); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, d0, d0); + BNEZ(x3, 4 + 2 * 4); // isnan(d0)? copy it + FSW(d0, gback, gdoffset + i * 4); + J(4 + 5 * 4); // continue + } FSQRTS(d0, d0); - FSW(d0, gback, gdoffset + 4 * i); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, d0, d0); + BNEZ(x3, 4 + 4); // isnan(d0)? negate it + FNEGS(d0, d0); + } + FSW(d0, gback, gdoffset + i * 4); } break; case 0x52: @@ -987,28 +1001,29 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni s0 = fpu_get_scratch(dyn); s1 = fpu_get_scratch(dyn); // 1.0f v0 = fpu_get_scratch(dyn); // 0.0f - // do accurate computation, because riscv doesn't have rsqrt - MOV32w(x3, 1); - FCVTSW(s1, x3, RD_DYN); + LUI(x3, 0x3f800); + FMVWX(s1, x3); // 1.0f if (!BOX64ENV(dynarec_fastnan)) { FCVTSW(v0, xZR, RD_DYN); } for (int i = 0; i < 4; ++i) { FLW(s0, wback, fixedaddress + i * 4); if (!BOX64ENV(dynarec_fastnan)) { - FLES(x3, v0, s0); // s0 >= 0.0f? - BNEZ(x3, 6 * 4); - FEQS(x3, s0, s0); // isnan(s0)? - BEQZ(x3, 2 * 4); - // s0 is negative, so generate a NaN - FDIVS(s0, s1, v0); - // s0 is a NaN, just copy it + FLTS(x3, v0, s0); // s0 > 0.0f? + BNEZ(x3, 4 + 5 * 4); + FEQS(x3, v0, s0); // s0 == 0.0f? + BEQZ(x3, 4 + 3 * 4); + FDIVS(s0, s1, v0); // generate an inf FSW(s0, gback, gdoffset + i * 4); - J(4 * 4); - // do regular computation + J(4 + 6 * 4); // continue } FSQRTS(s0, s0); FDIVS(s0, s1, s0); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, s0, s0); + BNEZ(x3, 4 + 4); // isnan(s0)? negate it + FNEGS(s0, s0); + } FSW(s0, gback, gdoffset + i * 4); } break; @@ -1023,7 +1038,18 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni FMVWX(d0, x3); // 1.0f for (int i = 0; i < 4; ++i) { FLW(d1, wback, fixedaddress + 4 * i); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, d1, d1); + BNEZ(x3, 4 + 2 * 4); // isnan(d1)? copy it + FSW(d1, gback, gdoffset + i * 4); + J(4 + 5 * 4); // continue + } FDIVS(d1, d0, d1); + if (!BOX64ENV(dynarec_fastnan)) { + FEQS(x3, d1, d1); + BNEZ(x3, 4 + 4); // isnan(d1)? negate it + FNEGS(d1, d1); + } FSW(d1, gback, gdoffset + 4 * i); } break; @@ -1057,7 +1083,6 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni case 0x57: INST_NAME("XORPS Gx, Ex"); nextop = F8; - // TODO: it might be possible to check if SS or SD are used and not purge them to optimize a bit GETGX(); if (MODREG && gd == (nextop & 7) + (rex.b << 3)) { // just zero dest diff --git a/src/dynarec/rv64/dynarec_rv64_0f_vector.c b/src/dynarec/rv64/dynarec_rv64_0f_vector.c index c3e7dfdd..5eb36493 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f_vector.c +++ b/src/dynarec/rv64/dynarec_rv64_0f_vector.c @@ -520,6 +520,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, } break; case 0x51: + if (!BOX64ENV(dynarec_fastround)) return 0; INST_NAME("SQRTPS Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); @@ -540,6 +541,7 @@ uintptr_t dynarec64_0F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, VFRDIV_VF(v1, v1, v0, VECTOR_UNMASKED); break; case 0x53: + if (!BOX64ENV(dynarec_fastround)) return 0; INST_NAME("RCPPS Gx, Ex"); nextop = F8; SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1); diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index 046861da..ee66c0ce 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -739,7 +739,10 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) GETEX(0); GETGX; for(int i=0; i<4; ++i) - GX->f[i] = sqrtf(EX->f[i]); + if (isnan(EX->f[i])) + GX->f[i] = EX->f[i]; + else + GX->f[i] = (EX->f[i] < 0) ? (-NAN) : sqrtf(EX->f[i]); break; case 0x52: /* RSQRTPS Gx, Ex */ nextop = F8; @@ -763,7 +766,10 @@ uintptr_t Run0F(x64emu_t *emu, rex_t rex, uintptr_t addr, int *step) GETEX(0); GETGX; for(int i=0; i<4; ++i) - GX->f[i] = 1.0f/EX->f[i]; + if (isnan(EX->f[i])) + GX->f[i] = EX->f[i]; + else + GX->f[i] = 1.0f / EX->f[i]; break; case 0x54: /* ANDPS Gx, Ex */ nextop = F8; |