about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-04-01 13:17:09 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-04-01 13:18:34 +0200
commit5661de58a8373801b0ecbd2dab016678cbb2a964 (patch)
tree4f69500c2830a7a9515aa4012c4979ec76f01a8e /src
parenta1d5cb0e6961c68499113345c02bcff42a0e9fab (diff)
downloadbox64-5661de58a8373801b0ecbd2dab016678cbb2a964.tar.gz
box64-5661de58a8373801b0ecbd2dab016678cbb2a964.zip
[ARM64_DYNAREC] Fixed and improved 66 0F 5A/5B opcodes
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h6
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_660f.c37
2 files changed, 38 insertions, 5 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 7d6b76cd..c84e4f86 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1266,6 +1266,12 @@
 #define FCVT_D_S(Dd, Sn)            EMIT(FCVT_precision(0b00, 0b01, Sn, Dd))
 #define FCVT_S_D(Sd, Dn)            EMIT(FCVT_precision(0b01, 0b00, Dn, Sd))
 
+#define FCVTN_vector(Q, sz, Rn, Rd)   ((Q)<<30 | 0<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10110<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
+// Convert Vn from 2*Double to lower Vd as 2*float and clears the upper half, use FPCR rounding
+#define FCVTN(Vd, Vn)               EMIT(FCVTN_vector(0, 1, Vn, Vd))
+// Convert Vn from 2*Double to higher Vd as 2*float, use FPCR rounding
+#define FCVTN2(Vd, Vn)              EMIT(FCVTN_vector(1, 1, Vn, Vd))
+
 #define FCVTXN_vector(Q, sz, Rn, Rd)   ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10110<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 // Convert Vn from 2*Double to lower Vd as 2*float and clears the upper half
 #define FCVTXN(Vd, Vn)              EMIT(FCVTXN_vector(0, 1, Vn, Vd))
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index eea7519c..5b21e174 100755
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -1058,17 +1058,44 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETEX(v1, 0, 0);

             GETGX_empty(v0);

-            FCVTXN(v0, v1);

+            if(box64_dynarec_fastround) {

+                FCVTXN(v0, v1);

+            } else {

+                u8 = sse_setround(dyn, ninst, x1, x2, x3);

+                FCVTN(v0, v1);

+                x87_restoreround(dyn, ninst, u8);

+            }

             break;

         case 0x5B:

             INST_NAME("CVTPS2DQ Gx, Ex");

             nextop = F8;

             GETEX(v1, 0, 0);

             GETGX_empty(v0);

-            u8 = sse_setround(dyn, ninst, x1, x2, x3);

-            VFRINTISQ(v0, v1);

-            x87_restoreround(dyn, ninst, u8);

-            VFCVTZSQS(v0, v0);

+            if(box64_dynarec_fastround) {

+                u8 = sse_setround(dyn, ninst, x1, x2, x3);

+                VFRINTISQ(q0, v1);

+                x87_restoreround(dyn, ninst, u8);

+                VFCVTZSQS(q0, q0);

+            } else {

+                MRS_fpsr(x5);

+                BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                MSR_fpsr(x5);

+                u8 = sse_setround(dyn, ninst, x1, x2, x3);

+                MOV32w(x4, 0x80000000);

+                d0 = fpu_get_scratch(dyn);

+                for(int i=0; i<4; ++i) {

+                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit

+                    MSR_fpsr(x5);

+                    VMOVeS(d0, 0, v1, i);

+                    FRINTIS(d0, d0);

+                    VFCVTZSs(d0, d0);

+                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit

+                    TBZ(x5, FPSR_IOC, 4+4);

+                    VMOVQSfrom(d0, 0, x4);

+                    VMOVeS(v0, i, d0, 0);

+                }

+                x87_restoreround(dyn, ninst, u8);

+            }

             break;

         case 0x5C:

             INST_NAME("SUBPD Gx, Ex");