diff options
| author | Yang Liu <liuyang22@iscas.ac.cn> | 2025-08-04 19:52:28 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-04 13:52:28 +0200 |
| commit | f4cd829c82d2106d84c9dfca45c56ebd0b89a5c7 (patch) | |
| tree | 8904090398f2495cf48576fdbd8030b7d6cf6064 /src | |
| parent | 7b0ecf9f6ce017f63d503d41fd6c1bd1a7995af2 (diff) | |
| download | box64-f4cd829c82d2106d84c9dfca45c56ebd0b89a5c7.tar.gz box64-f4cd829c82d2106d84c9dfca45c56ebd0b89a5c7.zip | |
[LA64_DYNAREC] Added and optimized more fastround=0 cases (#2890)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 16 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f30f.c | 31 |
2 files changed, 32 insertions, 15 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 596d26c5..9befaa52 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -2410,8 +2410,20 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETEX(v1, 0, 0); GETGX_empty(v0); - // TODO: fastround - VFTINTRZ_W_D(v0, v1, v1); + if (!BOX64ENV(dynarec_fastround)) { + d0 = fpu_get_scratch(dyn); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + VFTINTRZ_W_D(d0, v1, v1); + VLDI(q0, 0b1001110000000); // broadcast 32bit 0x80000000 to all + LU52I_D(x5, xZR, 0x41e); + VREPLGR2VR_D(q1, x5); + VFCMP_D(q1, q1, v1, cULE); + VSHUF4I_W(q1, q1, 0b00001000); + VBITSEL_V(v0, d0, q0, q1); + } else { + VFTINTRZ_W_D(v0, v1, v1); + } VINSGR2VR_D(v0, xZR, 1); break; case 0xE7: diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c index a09f4b2d..6078198b 100644 --- a/src/dynarec/la64/dynarec_la64_f30f.c +++ b/src/dynarec/la64/dynarec_la64_f30f.c @@ -240,20 +240,25 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int nextop = F8; GETEX(v1, 0, 0); GETGX_empty(v0); - VFTINTRZ_W_S(v0, v1); - if (!BOX64ENV(dynarec_fastround)) { + if (BOX64ENV(dynarec_fastround)) { + VFTINTRZ_W_S(v0, v1); + } else { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + VFTINTRZ_W_S(v0, v1); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + BEQZ_MARK(x5); // no fp exception, work done, fast path. + q0 = fpu_get_scratch(dyn); - q1 = fpu_get_scratch(dyn); - d1 = fpu_get_scratch(dyn); - VFCMP_S(q0, v1, v1, cEQ); - VLDI(q1, 0b1001110000000); // broadcast 0x80000000 - VAND_V(v0, q0, v0); - VANDN_V(d1, q0, q1); - VOR_V(v0, v0, d1); - VSUBI_WU(d1, q1, 1); - VSEQ_W(q0, v0, d1); - VSRLI_W(q0, q0, 31); - VADD_W(v0, v0, q0); + q1 = fpu_get_scratch(dyn); // mask + d0 = fpu_get_scratch(dyn); + VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all + VLDI(d0, (0b10011 << 8) | 0x4f); + VFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark + VBITSEL_V(v0, v0, q0, q1); + + MARK; } break; case 0x5C: |