[DYNAREC] Added F2 0F 2D opcodes, and use a simpler version of the conversions (but complex one is still available under a define in dynarec_arm64_helper.h)

author: ptitSeb <sebastien.chev@gmail.com> 2021-03-24 16:00:17 +0100
committer: ptitSeb <sebastien.chev@gmail.com> 2021-03-24 16:00:17 +0100
commit: 571de7f07f026aa955f7ffb926124e87c737aba1 (patch)
tree: 2c13b37ba30f815536928e2d01acff536093d03a
parent: b055c2be65ef407841f6988d44b69ffc0bfe281c (diff)
download: box64-571de7f07f026aa955f7ffb926124e87c737aba1.tar.gz
box64-571de7f07f026aa955f7ffb926124e87c737aba1.zip
4 files changed, 48 insertions, 0 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 23916e66..9bc3190c 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -866,28 +866,38 @@
 // Floating-point Convert to Signed integer, rounding toward Minus infinity
 #define FCVTMSwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b10, 0b100, Sn, Wd))
 #define FCVTMSxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b10, 0b100, Sn, Xd))
+#define FCVTMSxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b100, Sn, Xd))
 #define FCVTMSwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b10, 0b100, Dn, Wd))
 #define FCVTMSxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b10, 0b100, Dn, Xd))
+#define FCVTMSxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b10, 0b100, Dn, Xd))
 // Floating-point Convert to Unsigned integer, rounding toward Minus infinity
 #define FCVTMUwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b10, 0b101, Sn, Wd))
 #define FCVTMUxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b10, 0b101, Sn, Xd))
+#define FCVTMUxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b101, Sn, Xd))
 #define FCVTMUwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b10, 0b101, Dn, Wd))
 #define FCVTMUxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b10, 0b101, Dn, Xd))
+#define FCVTMUxwD(Xd, Dn)           EMIT(FCVT_scalar(rfex.w, 0b01, 0b10, 0b101, Dn, Xd))
 // Floating-point Convert to Signed integer, rounding to nearest with ties to even
 #define FCVTNSwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b00, 0b000, Sn, Wd))
 #define FCVTNSxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b00, 0b000, Sn, Xd))
+#define FCVTNSxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b000, Sn, Xd))
 #define FCVTNSwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b00, 0b000, Dn, Wd))
 #define FCVTNSxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b00, 0b000, Dn, Xd))
+#define FCVTNSxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b000, Dn, Xd))
 // Floating-point Convert to Unsigned integer, rounding to nearest with ties to even
 #define FCVTNUwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b00, 0b001, Sn, Wd))
 #define FCVTNUxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b00, 0b001, Sn, Xd))
+#define FCVTNUxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b001, Sn, Xd))
 #define FCVTNUwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b00, 0b001, Dn, Wd))
 #define FCVTNUxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b00, 0b001, Dn, Xd))
+#define FCVTNUxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b001, Dn, Xd))
 // Floating-point Convert to Signed integer, rounding toward Plus infinity
 #define FCVTPSwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b01, 0b000, Sn, Wd))
 #define FCVTPSxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b01, 0b000, Sn, Xd))
+#define FCVTPSxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b01, 0b000, Sn, Xd))
 #define FCVTPSwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b01, 0b000, Dn, Wd))
 #define FCVTPSxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b01, 0b000, Dn, Xd))
+#define FCVTPSxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b01, 0b000, Dn, Xd))
 // Floating-point Convert to Unsigned integer, rounding toward Plus infinity
 #define FCVTPUwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b01, 0b001, Sn, Wd))
 #define FCVTPUxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b01, 0b001, Sn, Xd))
@@ -903,8 +913,10 @@
 // Floating-point Convert to Unsigned integer, rounding toward Zero
 #define FCVTZUwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b11, 0b001, Sn, Wd))
 #define FCVTZUxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b11, 0b001, Sn, Xd))
+#define FCVTZUxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b11, 0b001, Sn, Xd))
 #define FCVTZUwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b11, 0b001, Dn, Wd))
 #define FCVTZUxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b11, 0b001, Dn, Xd))
+#define FCVTZUxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b11, 0b001, Dn, Xd))
 
 #define FCVT_vector_scalar(U, o2, sz, o1, Rn, Rd)   (0b01<<30 | (U)<<29 | 0b11110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 // Floating-point Convert to (Un)signed integer, rounding to nearest with ties to Away
diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index 7c23baed..b1e64b39 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -203,6 +203,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;
             GETEX(v1, 0);
             GETGX_empty(v0);
+            #ifdef PRECISE_CVT
             LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr));
             UBFXx(x1, x1, 13, 2);   // extract round requested
             LSLx_REG(x1, x1, 3);
@@ -218,6 +219,9 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             VFCVTPSQS(v0, v1);  // 2: Toward +inf
             B_NEXT_nocond;
             VFCVTZSQS(v0, v1);  // 3: Toward 0
+            #else
+            VFCVTNSQS(v0, v1);
+            #endif
             break;
 
         case 0x60:
diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c
index c153f907..e8200877 100755
--- a/src/dynarec/dynarec_arm64_f20f.c
+++ b/src/dynarec/dynarec_arm64_f20f.c
@@ -48,6 +48,10 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     int d0, d1;
     int fixedaddress;
 
+#ifdef PRECISE_CVT
+    int j32;
+    MAYUSE(j32);
+#endif
     MAYUSE(d0);
     MAYUSE(d1);
     MAYUSE(q0);
@@ -107,6 +111,31 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETEX(q0, 0);
             FCVTZSxwD(gd, q0);
             break;
+        case 0x2D:
+            INST_NAME("CVTSD2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEX(q0, 0);
+            #ifdef PRECISE_CVT
+            LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr));
+            UBFXx(x1, x1, 13, 2);   // extract round requested
+            LSLx_REG(x1, x1, 3);
+            ADDx_U12(x1, x1, 8);    // add the actual add+jump opcodes
+            // Construct a "switch case", with each case 2 instructions, so 8 bytes
+            BL(+4); // Branch with Link to next, so LR gets next PC address
+            ADDx_REG(xLR, xLR, x1);
+            B(xLR); // could use RET, but it's not really one
+            FCVTNSxwD(gd, q0);  // 0: Nearest (even)
+            B_NEXT_nocond;
+            FCVTMSxwD(gd, q0);  // 1: Toward -inf
+            B_NEXT_nocond;
+            FCVTPSxwD(gd, q0);  // 2: Toward +inf
+            B_NEXT_nocond;
+            FCVTZSxwD(gd, q0);  // 3: Toward 0
+            #else
+            FCVTNSxwD(gd, q0);
+            #endif
+            break;
 
 
         case 0x51:
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index f61f9d23..53f8da67 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -1,6 +1,9 @@
 #ifndef __DYNAREC_ARM64_HELPER_H__
 #define __DYNAREC_ARM64_HELPER_H__
 
+// undef to get Close to SSE Float->int conversions
+//#define PRECISE_CVT
+
 #if STEP == 0
 #include "dynarec_arm64_pass0.h"
 #elif STEP == 1
author	ptitSeb <sebastien.chev@gmail.com>	2021-03-24 16:00:17 +0100
committer	ptitSeb <sebastien.chev@gmail.com>	2021-03-24 16:00:17 +0100
commit	571de7f07f026aa955f7ffb926124e87c737aba1 (patch)
tree	2c13b37ba30f815536928e2d01acff536093d03a
parent	b055c2be65ef407841f6988d44b69ffc0bfe281c (diff)
download	box64-571de7f07f026aa955f7ffb926124e87c737aba1.tar.gz box64-571de7f07f026aa955f7ffb926124e87c737aba1.zip