about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-27 17:38:16 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-10-27 17:38:16 +0200
commit15860f324532247345e1f314eb8ebbfe37c5d531 (patch)
tree507acb224ac3cd61bfb30a1f51707b6347a8562d /src
parent8a1e4cdf306ff3f57a8603004e068549248db29b (diff)
downloadbox64-15860f324532247345e1f314eb8ebbfe37c5d531.tar.gz
box64-15860f324532247345e1f314eb8ebbfe37c5d531.zip
[ARM64_DYNAREC] Fixed and improved i64 x87 optimisation
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_dd.c44
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c7
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.h3
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h8
4 files changed, 38 insertions, 24 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c
index c56258d1..3c6b6965 100644
--- a/src/dynarec/arm64/dynarec_arm64_dd.c
+++ b/src/dynarec/arm64/dynarec_arm64_dd.c
@@ -166,27 +166,31 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 1:
                     INST_NAME("FISTTP i64, ST0");
-                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D);
+                    v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_I64);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0);
-                    s0 = fpu_get_scratch(dyn);
-                    #if 0
-                    // those are ARM 8.5 opcode!
-                    FRINT64ZD(s0, v1);
-                    FCVTZSxD(x2, s0);
-                    STRx_U12(x2, ed, fixedaddress);
-                    #else
-                    MRS_fpsr(x5);
-                    BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
-                    MSR_fpsr(x5);
-                    FRINTRRD(s0, v1, 3);
-                    FCVTZSxD(x2, s0);
-                    STx(x2, ed, fixedaddress);
-                    MRS_fpsr(x5);   // get back FPSR to check the IOC bit
-                    TBZ_MARK3(x5, FPSR_IOC);
-                    ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
-                    STx(x5, ed, fixedaddress);
-                    MARK3;
-                    #endif
+                    if(ST_IS_I64(0)) {
+                        VST64(v1, ed, fixedaddress);
+                    } else {
+                        s0 = fpu_get_scratch(dyn);
+                        #if 0
+                        // those are ARM 8.5 opcode!
+                        FRINT64ZD(s0, v1);
+                        FCVTZSxD(x2, s0);
+                        STRx_U12(x2, ed, fixedaddress);
+                        #else
+                        MRS_fpsr(x5);
+                        BFCw(x5, FPSR_IOC, 1);   // reset IOC bit
+                        MSR_fpsr(x5);
+                        FRINTRRD(s0, v1, 3);
+                        FCVTZSxD(x2, s0);
+                        STx(x2, ed, fixedaddress);
+                        MRS_fpsr(x5);   // get back FPSR to check the IOC bit
+                        TBZ_MARK3(x5, FPSR_IOC);
+                        ORRx_mask(x5, xZR, 1, 1, 0);    //0x8000000000000000
+                        STx(x5, ed, fixedaddress);
+                        MARK3;
+                        #endif
+                    }
                     X87_POP_OR_FAIL(dyn, ninst, x3);
                     break;
                 case 2:
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index f95fd7a9..28e1fc5b 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -95,6 +95,13 @@ void fpu_reset_reg(dynarec_arm_t* dyn)
 
 }
 
+int neoncache_no_i64(int a)
+{
+    if(a==NEON_CACHE_ST_I64)
+        return NEON_CACHE_ST_D;
+    return a;
+}
+
 int neoncache_get_st(dynarec_arm_t* dyn, int ninst, int a)
 {
     if (dyn->insts[ninst].n.swapped) {
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h
index 77982715..0111c8a2 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.h
+++ b/src/dynarec/arm64/dynarec_arm64_functions.h
@@ -37,6 +37,9 @@ int neoncache_get_current_st_f_i64(dynarec_arm_t* dyn, int a);
 void neoncache_promote_double(dynarec_arm_t* dyn, int ninst, int a);
 // Combine and propagate if needed (pass 1 only)
 int neoncache_combine_st(dynarec_arm_t* dyn, int ninst, int a, int b);  // with stack current dyn->n_stack*
+// Do not allow i64 type
+int neoncache_no_i64(int a);
+
 
 // FPU Cache transformation (for loops) // Specific, need to be written by backend
 int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst);
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index b3f02ee5..85480f27 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -1203,8 +1203,8 @@ int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b);
 #define ST_IS_F(A) (neoncache_get_current_st(dyn, ninst, A)==NEON_CACHE_ST_F)
 #define ST_IS_I64(A) (neoncache_get_current_st(dyn, ninst, A)==NEON_CACHE_ST_I64)
 #define X87_COMBINE(A, B) neoncache_combine_st(dyn, ninst, A, B)
-#define X87_ST0     neoncache_get_current_st(dyn, ninst, 0)
-#define X87_ST(A)   neoncache_get_current_st(dyn, ninst, A)
+#define X87_ST0     neoncache_no_i64(neoncache_get_current_st(dyn, ninst, 0))
+#define X87_ST(A)   neoncache_no_i64(neoncache_get_current_st(dyn, ninst, A))
 #else
 #define ST_IS_F(A) (neoncache_get_st(dyn, ninst, A)==NEON_CACHE_ST_F)
 #define ST_IS_I64(A) (neoncache_get_st(dyn, ninst, A)==NEON_CACHE_ST_I64)
@@ -1213,8 +1213,8 @@ int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b);
 #else
 #define X87_COMBINE(A, B) neoncache_get_st(dyn, ninst, A)
 #endif
-#define X87_ST0     neoncache_get_st(dyn, ninst, 0)
-#define X87_ST(A)   neoncache_get_st(dyn, ninst, A)
+#define X87_ST0     neoncache_no_i64(neoncache_get_st(dyn, ninst, 0))
+#define X87_ST(A)   neoncache_no_i64(neoncache_get_st(dyn, ninst, A))
 #endif
 
 //MMX helpers