about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-01-05 14:22:13 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-01-05 14:22:13 +0100
commit077ba65f7e1ada261999197ea623488675df8cfa (patch)
tree752e53fa514e923d59a147a35f87943d722d6a06 /src
parent6f6a42642418ea34a77a0648a75957a28b733a1e (diff)
downloadbox64-077ba65f7e1ada261999197ea623488675df8cfa.tar.gz
box64-077ba65f7e1ada261999197ea623488675df8cfa.zip
[ARM64_DYNAREC] More FRINTTS use
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h18
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c74
-rw-r--r--src/dynarec/arm64/dynarec_arm64_df.c2
3 files changed, 61 insertions, 33 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index cbfa8371..51dc499a 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -2171,6 +2171,24 @@
 #define FRINT64XS(Sd, Sn)           EMIT(FRINTxx_scalar(0b00, 0b11, Sn, Sd))
 #define FRINT64XD(Dd, Dn)           EMIT(FRINTxx_scalar(0b01, 0b11, Dn, Dd))
 
+#define FRINTxx_vector(Q, U, sz, op, Rn, Rd)    ((Q)<<30 | (U)<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b1111<<13 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
+#define VFRINT32ZS(Vd, Vn)          EMIT(FRINTxx_vector(0, 0, 0, 0, Vn, Vd))
+#define VFRINT32ZSQ(Vd, Vn)         EMIT(FRINTxx_vector(1, 0, 0, 0, Vn, Vd))
+#define VFRINT32XS(Vd, Vn)          EMIT(FRINTxx_vector(0, 1, 0, 0, Vn, Vd))
+#define VFRINT32XSQ(Vd, Vn)         EMIT(FRINTxx_vector(1, 1, 0, 0, Vn, Vd))
+#define VFRINT32ZD(Vd, Vn)          EMIT(FRINTxx_vector(0, 0, 1, 0, Vn, Vd))
+#define VFRINT32ZDQ(Vd, Vn)         EMIT(FRINTxx_vector(1, 0, 1, 0, Vn, Vd))
+#define VFRINT32XD(Vd, Vn)          EMIT(FRINTxx_vector(0, 1, 1, 0, Vn, Vd))
+#define VFRINT32XDQ(Vd, Vn)         EMIT(FRINTxx_vector(1, 1, 1, 0, Vn, Vd))
+#define VFRINT64ZS(Vd, Vn)          EMIT(FRINTxx_vector(0, 0, 0, 1, Vn, Vd))
+#define VFRINT64ZSQ(Vd, Vn)         EMIT(FRINTxx_vector(1, 0, 0, 1, Vn, Vd))
+#define VFRINT64XS(Vd, Vn)          EMIT(FRINTxx_vector(0, 1, 0, 1, Vn, Vd))
+#define VFRINT64XSQ(Vd, Vn)         EMIT(FRINTxx_vector(1, 1, 0, 1, Vn, Vd))
+#define VFRINT64ZD(Vd, Vn)          EMIT(FRINTxx_vector(0, 0, 1, 1, Vn, Vd))
+#define VFRINT64ZDQ(Vd, Vn)         EMIT(FRINTxx_vector(1, 0, 1, 1, Vn, Vd))
+#define VFRINT64XD(Vd, Vn)          EMIT(FRINTxx_vector(0, 1, 1, 1, Vn, Vd))
+#define VFRINT64XDQ(Vd, Vn)         EMIT(FRINTxx_vector(1, 1, 1, 1, Vn, Vd))
+
 // CRC32 extension
 #define CRC32C_gen(sf, Rm, sz, Rn, Rd)  ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b010<<13 | 1<<12 | (sz)<<10 | (Rn)<<5 | (Rd))
 #define CRC32CB(Wd, Wn, Wm)         EMIT(CRC32C_gen(0, Wm, 0b00, Wn, Wd))
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 3ef67695..8a9aeb6c 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -336,24 +336,29 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if (box64_dynarec_fastround) {

                 VFCVTZSS(q0, v1);

             } else {

-                MRS_fpsr(x5);

-                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                MSR_fpsr(x5);

-                ORRw_mask(x2, xZR, 1, 0);    //0x80000000

-                d0 = fpu_get_scratch(dyn);

-                for (int i=0; i<2; ++i) {

+                if(arm64_frintts) {

+                    VFRINT32ZS(q0, v1);

+                    VFCVTZSS(q0, q0);

+                } else {

+                    MRS_fpsr(x5);

                     BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                    if (i) {

-                        VMOVeS(d0, 0, v1, i);

-                        FRINTZS(d0, d0);

-                    } else {

-                        FRINTZS(d0, v1);

+                    MSR_fpsr(x5);

+                    ORRw_mask(x2, xZR, 1, 0);    //0x80000000

+                    d0 = fpu_get_scratch(dyn);

+                    for (int i=0; i<2; ++i) {

+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                        if (i) {

+                            VMOVeS(d0, 0, v1, i);

+                            FRINTZS(d0, d0);

+                        } else {

+                            FRINTZS(d0, v1);

+                        }

+                        FCVTZSwS(x1, d0);

+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                        TBZ(x5, FPSR_IOC, 4+4);

+                        MOVw_REG(x1, x2);

+                        VMOVQSfrom(q0, i, x1);

                     }

-                    FCVTZSwS(x1, d0);

-                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

-                    TBZ(x5, FPSR_IOC, 4+4);

-                    MOVw_REG(x1, x2);

-                    VMOVQSfrom(q0, i, x1);

                 }

             }

             break;

@@ -369,24 +374,29 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VFCVTZSS(q0, q0);

             } else {

                 u8 = sse_setround(dyn, ninst, x1, x2, x3);

-                MRS_fpsr(x5);

-                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                MSR_fpsr(x5);

-                ORRw_mask(x2, xZR, 1, 0);    //0x80000000

-                d0 = fpu_get_scratch(dyn);

-                for (int i=0; i<2; ++i) {

+                if(arm64_frintts) {

+                    VFRINT32XS(q0, v1);

+                    VFCVTZSS(q0, q0);

+                } else {

+                    MRS_fpsr(x5);

                     BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

-                    if (i) {

-                        VMOVeS(d0, 0, v1, i);

-                        FRINTIS(d0, d0);

-                    } else {

-                        FRINTIS(d0, v1);

+                    MSR_fpsr(x5);

+                    ORRw_mask(x2, xZR, 1, 0);    //0x80000000

+                    d0 = fpu_get_scratch(dyn);

+                    for (int i=0; i<2; ++i) {

+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                        if (i) {

+                            VMOVeS(d0, 0, v1, i);

+                            FRINTIS(d0, d0);

+                        } else {

+                            FRINTIS(d0, v1);

+                        }

+                        FCVTZSwS(x1, d0);

+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                        TBZ(x5, FPSR_IOC, 4+4);

+                        MOVw_REG(x1, x2);

+                        VMOVQSfrom(q0, i, x1);

                     }

-                    FCVTZSwS(x1, d0);

-                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

-                    TBZ(x5, FPSR_IOC, 4+4);

-                    MOVw_REG(x1, x2);

-                    VMOVQSfrom(q0, i, x1);

                 }

                 x87_restoreround(dyn, ninst, u8);

             }

diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c
index b81c4128..5853fd39 100644
--- a/src/dynarec/arm64/dynarec_arm64_df.c
+++ b/src/dynarec/arm64/dynarec_arm64_df.c
@@ -176,7 +176,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     ed = x1;
                     s0 = fpu_get_scratch(dyn);
                     #if 0
-                    // this version needs ARM v8.5, //TODO: add detection of this extension to use it
+                    // this version needs ARM v8.5, and doesn't handle saturation for 32bits integer not fitting 16bits
                     FRINT32ZD(s0, v1);
                     // no saturation instruction on Arm, so using NEON
                     VFCVTZSd(s0, s0);