about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-05-28 17:25:23 +0200
committerptitSeb <sebastien.chev@gmail.com>2025-05-28 17:25:23 +0200
commit6697c7d2294d3a75ac930f713146327998ba7d32 (patch)
tree7656f78b8e7bf6a8c67ccfca07175d5d5ec54083 /src
parent6a41f33bf3a6a0e34e909dc18165b68b32608dac (diff)
downloadbox64-6697c7d2294d3a75ac930f713146327998ba7d32.tar.gz
box64-6697c7d2294d3a75ac930f713146327998ba7d32.zip
[ARM64_DYNAREC] Optimized a bit CVTTPS2DQ on fastround=0 when frintts is not supported on the cpu
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f30f.c19
1 files changed, 18 insertions, 1 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c
index 33b1942a..b8a50063 100644
--- a/src/dynarec/arm64/dynarec_arm64_f30f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f30f.c
@@ -326,9 +326,26 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     VFRINT32ZSQ(v0, v1);

                     VFCVTZSQS(v0, v0);

                 } else {

+                    // try to transform the 4 values first, then fall back to 1 at time if needed

                     MRS_fpsr(x5);

-                    ORRw_mask(x4, xZR, 1, 0);    //0x80000000

+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                    MSR_fpsr(x5);

                     d0 = fpu_get_scratch(dyn, ninst);

+                    if(v0!=v1) {

+                        VFCVTZSQS(v0, v1);

+                    } else {

+                        VFCVTZSQS(d0, v1);

+                    }

+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                    if(v0!=v1) {

+                        TBZ_NEXT(x5, FPSR_IOC);

+                    } else {

+                        TBNZ_MARK(x5, FPSR_IOC);

+                        VMOVQ(v0, d0);

+                        B_NEXT_nocond;

+                        MARK;

+                    }

+                    ORRw_mask(x4, xZR, 1, 0);    //0x80000000

                     for(int i=0; i<4; ++i) {

                         BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

                         MSR_fpsr(x5);