about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-11-13 17:08:16 +0100
committerptitSeb <sebastien.chev@gmail.com>2022-11-13 17:08:16 +0100
commit9f037fef60bef8309d491e094b7f90d35d8f8a3d (patch)
tree20c963cdbd251e0d6998fbabcf4e8d6e61abb4b5
parent0bc7100ea81d5097110e944d34a0162d074593ac (diff)
downloadbox64-9f037fef60bef8309d491e094b7f90d35d8f8a3d.tar.gz
box64-9f037fef60bef8309d491e094b7f90d35d8f8a3d.zip
[DYNAREC] Optimisations and fixes on a few x87 opcodes (fixes Piczle Cross Adventure on Steam)
-rwxr-xr-xsrc/dynarec/arm64/arm64_emitter.h5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_d9.c19
-rw-r--r--src/dynarec/arm64/dynarec_arm64_dd.c4
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_private.h8
4 files changed, 22 insertions, 14 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 404a531b..db4ef096 100755
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -1245,6 +1245,11 @@
 #define VFRINTISQ(Vd,Vn)            EMIT(FRINT_vector(1, 1, 1, 0, 1, Vn, Vd))
 #define VFRINTIDQ(Vd,Vn)            EMIT(FRINT_vector(1, 1, 1, 1, 1, Vn, Vd))
 
+#define FRINTI_scalar(type, Rn, Rd)  (0b11110<<24 | (type)<<22 | 1<<21 | 0b001<<18 | 0b111<<15 | 0b10000<<10 | (Rn)<<5 | (Rd))
+#define FRINTIS(Sd, Sn)             EMIT(FRINTI_scalar(0b00, Sn, Sd))
+#define FRINTID(Dd, Dn)             EMIT(FRINTI_scalar(0b01, Dn, Dd))
+
+
 #define FRINTxx_scalar(type, op, Rn, Rd)  (0b11110<<24 | (type)<<22 | 1<<21 | 0b0100<<17 | (op)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd))
 #define FRINT32ZS(Sd, Sn)           EMIT(FRINTxx_scalar(0b00, 0b00, Sn, Sd))
 #define FRINT32ZD(Dd, Dn)           EMIT(FRINTxx_scalar(0b01, 0b00, Dn, Dd))
diff --git a/src/dynarec/arm64/dynarec_arm64_d9.c b/src/dynarec/arm64/dynarec_arm64_d9.c
index 63b39b97..ce101f46 100644
--- a/src/dynarec/arm64/dynarec_arm64_d9.c
+++ b/src/dynarec/arm64/dynarec_arm64_d9.c
@@ -31,6 +31,7 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
     uint8_t nextop = F8;
     uint8_t ed;
     uint8_t wback, wb1;
+    uint8_t u8;
     int64_t fixedaddress;
     int v1, v2;
     int s0;
@@ -71,6 +72,8 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xCF:
             INST_NAME("FXCH STx");
             // swap the cache value, not the double value itself :p
+            x87_get_st(dyn, ninst, x1, x2, nextop&7, X87_ST(nextop&7));
+            x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
             x87_swapreg(dyn, ninst, x1, x2, 0, nextop&7);
             // should set C1 to 0
             break;
@@ -92,6 +95,8 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xDF:
             INST_NAME("FSTPNCE ST0, STx");
             // copy the cache value for st0 to stx
+            x87_get_st_empty(dyn, ninst, x1, x2, nextop&7, X87_ST(nextop&7));
+            x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
             x87_swapreg(dyn, ninst, x1, x2, 0, nextop&7);
             x87_do_pop(dyn, ninst, x3);
             break;
@@ -268,21 +273,17 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             break;
         case 0xFC:
             INST_NAME("FRNDINT");
+            #if 0
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             // use C helper for now, nothing staightforward is available
             x87_forget(dyn, ninst, x1, x2, 0);
             CALL(arm_frndint, -1);
-            /*
-            v1 = x87_get_st(dyn, ninst, x1, x2, 0);
-            VCMP_F64_0(v1);
-            VMRS_APSR();
-            B_NEXT(cVS);    // Unordered, skip
-            B_NEXT(cEQ);    // Zero, skip
+            #else
+            v1 = x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
             u8 = x87_setround(dyn, ninst, x1, x2, x3);
-            VCVT_S32_F64(x1, v1);   // limit to 32bits....
-            VCVT_F64_S32(v1, x1);
+            FRINTID(v1, v1);
             x87_restoreround(dyn, ninst, u8);
-            */
+            #endif
             break;
         case 0xFD:
             INST_NAME("FSCALE");
diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c
index 9a5fc987..686a7dde 100644
--- a/src/dynarec/arm64/dynarec_arm64_dd.c
+++ b/src/dynarec/arm64/dynarec_arm64_dd.c
@@ -83,6 +83,8 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0xDF:
             INST_NAME("FSTP ST0, STx");
             // copy the cache value for st0 to stx
+            x87_get_st_empty(dyn, ninst, x1, x2, nextop&7, X87_ST(nextop&7));
+            x87_get_st(dyn, ninst, x1, x2, 0, X87_ST0);
             x87_swapreg(dyn, ninst, x1, x2, 0, nextop&7);
             x87_do_pop(dyn, ninst, x3);
             break;
@@ -162,7 +164,7 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 1:
                     INST_NAME("FISTTP i64, ST0");
-                    v1 = x87_do_push(dyn, ninst, x3, NEON_CACHE_ST_D);
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);
                     s0 = fpu_get_scratch(dyn);
                     #if 0
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index 1cac7d3e..37d43372 100755
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -19,15 +19,15 @@ typedef struct instsize_s instsize_t;
 typedef union neon_cache_s {
     int8_t           v;
     struct {
-        unsigned int t:4;   // reg type
-        unsigned int n:4;   // reg number
+        uint8_t t:4;   // reg type
+        uint8_t n:4;   // reg number
     };
 } neon_cache_t;
 typedef union sse_cache_s {
     int8_t      v;
     struct {
-        unsigned int reg:7;
-        unsigned int write:1;
+        uint8_t reg:7;
+        uint8_t write:1;
     };
 } sse_cache_t;
 typedef struct neoncache_s {