about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2025-08-04 19:52:28 +0800
committerGitHub <noreply@github.com>2025-08-04 13:52:28 +0200
commitf4cd829c82d2106d84c9dfca45c56ebd0b89a5c7 (patch)
tree8904090398f2495cf48576fdbd8030b7d6cf6064 /src
parent7b0ecf9f6ce017f63d503d41fd6c1bd1a7995af2 (diff)
downloadbox64-f4cd829c82d2106d84c9dfca45c56ebd0b89a5c7.tar.gz
box64-f4cd829c82d2106d84c9dfca45c56ebd0b89a5c7.zip
[LA64_DYNAREC] Added and optimized more fastround=0 cases (#2890)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c16
-rw-r--r--src/dynarec/la64/dynarec_la64_f30f.c31
2 files changed, 32 insertions, 15 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 596d26c5..9befaa52 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -2410,8 +2410,20 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETEX(v1, 0, 0);
             GETGX_empty(v0);
-            // TODO: fastround
-            VFTINTRZ_W_D(v0, v1, v1);
+            if (!BOX64ENV(dynarec_fastround)) {
+                d0 = fpu_get_scratch(dyn);
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn);
+                VFTINTRZ_W_D(d0, v1, v1);
+                VLDI(q0, 0b1001110000000); // broadcast 32bit 0x80000000 to all
+                LU52I_D(x5, xZR, 0x41e);
+                VREPLGR2VR_D(q1, x5);
+                VFCMP_D(q1, q1, v1, cULE);
+                VSHUF4I_W(q1, q1, 0b00001000);
+                VBITSEL_V(v0, d0, q0, q1);
+            } else {
+                VFTINTRZ_W_D(v0, v1, v1);
+            }
             VINSGR2VR_D(v0, xZR, 1);
             break;
         case 0xE7:
diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c
index a09f4b2d..6078198b 100644
--- a/src/dynarec/la64/dynarec_la64_f30f.c
+++ b/src/dynarec/la64/dynarec_la64_f30f.c
@@ -240,20 +240,25 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETEX(v1, 0, 0);
             GETGX_empty(v0);
-            VFTINTRZ_W_S(v0, v1);
-            if (!BOX64ENV(dynarec_fastround)) {
+            if (BOX64ENV(dynarec_fastround)) {
+                VFTINTRZ_W_S(v0, v1);
+            } else {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+                VFTINTRZ_W_S(v0, v1);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                BEQZ_MARK(x5); // no fp exception, work done, fast path.
+
                 q0 = fpu_get_scratch(dyn);
-                q1 = fpu_get_scratch(dyn);
-                d1 = fpu_get_scratch(dyn);
-                VFCMP_S(q0, v1, v1, cEQ);
-                VLDI(q1, 0b1001110000000); // broadcast 0x80000000
-                VAND_V(v0, q0, v0);
-                VANDN_V(d1, q0, q1);
-                VOR_V(v0, v0, d1);
-                VSUBI_WU(d1, q1, 1);
-                VSEQ_W(q0, v0, d1);
-                VSRLI_W(q0, q0, 31);
-                VADD_W(v0, v0, q0);
+                q1 = fpu_get_scratch(dyn); // mask
+                d0 = fpu_get_scratch(dyn);
+                VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
+                VLDI(d0, (0b10011 << 8) | 0x4f);
+                VFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark
+                VBITSEL_V(v0, v0, q0, q1);
+
+                MARK;
             }
             break;
         case 0x5C: