about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-24 16:00:17 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-24 16:00:17 +0100
commit571de7f07f026aa955f7ffb926124e87c737aba1 (patch)
tree2c13b37ba30f815536928e2d01acff536093d03a
parentb055c2be65ef407841f6988d44b69ffc0bfe281c (diff)
downloadbox64-571de7f07f026aa955f7ffb926124e87c737aba1.tar.gz
box64-571de7f07f026aa955f7ffb926124e87c737aba1.zip
[DYNAREC] Added F2 0F 2D opcodes, and use a simpler version of the conversions (but complex one is still available under a define in dynarec_arm64_helper.h)
-rwxr-xr-xsrc/dynarec/arm64_emitter.h12
-rwxr-xr-xsrc/dynarec/dynarec_arm64_660f.c4
-rwxr-xr-xsrc/dynarec/dynarec_arm64_f20f.c29
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h3
4 files changed, 48 insertions, 0 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 23916e66..9bc3190c 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -866,28 +866,38 @@
 // Floating-point Convert to Signed integer, rounding toward Minus infinity
 #define FCVTMSwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b10, 0b100, Sn, Wd))
 #define FCVTMSxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b10, 0b100, Sn, Xd))
+#define FCVTMSxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b100, Sn, Xd))
 #define FCVTMSwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b10, 0b100, Dn, Wd))
 #define FCVTMSxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b10, 0b100, Dn, Xd))
+#define FCVTMSxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b10, 0b100, Dn, Xd))
 // Floating-point Convert to Unsigned integer, rounding toward Minus infinity
 #define FCVTMUwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b10, 0b101, Sn, Wd))
 #define FCVTMUxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b10, 0b101, Sn, Xd))
+#define FCVTMUxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b101, Sn, Xd))
 #define FCVTMUwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b10, 0b101, Dn, Wd))
 #define FCVTMUxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b10, 0b101, Dn, Xd))
+#define FCVTMUxwD(Xd, Dn)           EMIT(FCVT_scalar(rfex.w, 0b01, 0b10, 0b101, Dn, Xd))
 // Floating-point Convert to Signed integer, rounding to nearest with ties to even
 #define FCVTNSwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b00, 0b000, Sn, Wd))
 #define FCVTNSxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b00, 0b000, Sn, Xd))
+#define FCVTNSxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b000, Sn, Xd))
 #define FCVTNSwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b00, 0b000, Dn, Wd))
 #define FCVTNSxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b00, 0b000, Dn, Xd))
+#define FCVTNSxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b000, Dn, Xd))
 // Floating-point Convert to Unsigned integer, rounding to nearest with ties to even
 #define FCVTNUwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b00, 0b001, Sn, Wd))
 #define FCVTNUxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b00, 0b001, Sn, Xd))
+#define FCVTNUxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b001, Sn, Xd))
 #define FCVTNUwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b00, 0b001, Dn, Wd))
 #define FCVTNUxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b00, 0b001, Dn, Xd))
+#define FCVTNUxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b001, Dn, Xd))
 // Floating-point Convert to Signed integer, rounding toward Plus infinity
 #define FCVTPSwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b01, 0b000, Sn, Wd))
 #define FCVTPSxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b01, 0b000, Sn, Xd))
+#define FCVTPSxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b01, 0b000, Sn, Xd))
 #define FCVTPSwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b01, 0b000, Dn, Wd))
 #define FCVTPSxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b01, 0b000, Dn, Xd))
+#define FCVTPSxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b01, 0b000, Dn, Xd))
 // Floating-point Convert to Unsigned integer, rounding toward Plus infinity
 #define FCVTPUwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b01, 0b001, Sn, Wd))
 #define FCVTPUxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b01, 0b001, Sn, Xd))
@@ -903,8 +913,10 @@
 // Floating-point Convert to Unsigned integer, rounding toward Zero
 #define FCVTZUwS(Wd, Sn)            EMIT(FCVT_scalar(0, 0b00, 0b11, 0b001, Sn, Wd))
 #define FCVTZUxS(Xd, Sn)            EMIT(FCVT_scalar(1, 0b00, 0b11, 0b001, Sn, Xd))
+#define FCVTZUxwS(Xd, Sn)           EMIT(FCVT_scalar(rex.w, 0b00, 0b11, 0b001, Sn, Xd))
 #define FCVTZUwD(Wd, Dn)            EMIT(FCVT_scalar(0, 0b01, 0b11, 0b001, Dn, Wd))
 #define FCVTZUxD(Xd, Dn)            EMIT(FCVT_scalar(1, 0b01, 0b11, 0b001, Dn, Xd))
+#define FCVTZUxwD(Xd, Dn)           EMIT(FCVT_scalar(rex.w, 0b01, 0b11, 0b001, Dn, Xd))
 
 #define FCVT_vector_scalar(U, o2, sz, o1, Rn, Rd)   (0b01<<30 | (U)<<29 | 0b11110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd))
 // Floating-point Convert to (Un)signed integer, rounding to nearest with ties to Away
diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c
index 7c23baed..b1e64b39 100755
--- a/src/dynarec/dynarec_arm64_660f.c
+++ b/src/dynarec/dynarec_arm64_660f.c
@@ -203,6 +203,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             nextop = F8;

             GETEX(v1, 0);

             GETGX_empty(v0);

+            #ifdef PRECISE_CVT

             LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr));

             UBFXx(x1, x1, 13, 2);   // extract round requested

             LSLx_REG(x1, x1, 3);

@@ -218,6 +219,9 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             VFCVTPSQS(v0, v1);  // 2: Toward +inf

             B_NEXT_nocond;

             VFCVTZSQS(v0, v1);  // 3: Toward 0

+            #else

+            VFCVTNSQS(v0, v1);

+            #endif

             break;

 

         case 0x60:

diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c
index c153f907..e8200877 100755
--- a/src/dynarec/dynarec_arm64_f20f.c
+++ b/src/dynarec/dynarec_arm64_f20f.c
@@ -48,6 +48,10 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     int d0, d1;

     int fixedaddress;

 

+#ifdef PRECISE_CVT

+    int j32;

+    MAYUSE(j32);

+#endif

     MAYUSE(d0);

     MAYUSE(d1);

     MAYUSE(q0);

@@ -107,6 +111,31 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             GETEX(q0, 0);

             FCVTZSxwD(gd, q0);

             break;

+        case 0x2D:

+            INST_NAME("CVTSD2SI Gd, Ex");

+            nextop = F8;

+            GETGD;

+            GETEX(q0, 0);

+            #ifdef PRECISE_CVT

+            LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr));

+            UBFXx(x1, x1, 13, 2);   // extract round requested

+            LSLx_REG(x1, x1, 3);

+            ADDx_U12(x1, x1, 8);    // add the actual add+jump opcodes

+            // Construct a "switch case", with each case 2 instructions, so 8 bytes

+            BL(+4); // Branch with Link to next, so LR gets next PC address

+            ADDx_REG(xLR, xLR, x1);

+            B(xLR); // could use RET, but it's not really one

+            FCVTNSxwD(gd, q0);  // 0: Nearest (even)

+            B_NEXT_nocond;

+            FCVTMSxwD(gd, q0);  // 1: Toward -inf

+            B_NEXT_nocond;

+            FCVTPSxwD(gd, q0);  // 2: Toward +inf

+            B_NEXT_nocond;

+            FCVTZSxwD(gd, q0);  // 3: Toward 0

+            #else

+            FCVTNSxwD(gd, q0);

+            #endif

+            break;

 

 

         case 0x51:

diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index f61f9d23..53f8da67 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -1,6 +1,9 @@
 #ifndef __DYNAREC_ARM64_HELPER_H__
 #define __DYNAREC_ARM64_HELPER_H__
 
+// undef to get Close to SSE Float->int conversions
+//#define PRECISE_CVT
+
 #if STEP == 0
 #include "dynarec_arm64_pass0.h"
 #elif STEP == 1