about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-16 14:54:33 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-16 14:54:33 +0100
commit70f50037845e2d368e5f47197bed28bcecf8dc85 (patch)
tree832959da9b660e5ada4062dcefc894b03dadc5a0 /src
parentc2a0d7bc32ee6068c8b99e3be67c112678c4d517 (diff)
downloadbox64-70f50037845e2d368e5f47197bed28bcecf8dc85.tar.gz
box64-70f50037845e2d368e5f47197bed28bcecf8dc85.zip
[DYNAREC] Small optimisation on Native call
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c5
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.c29
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h31
-rwxr-xr-xsrc/dynarec/dynarec_arm64_private.h10
4 files changed, 64 insertions, 11 deletions
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index a5d903af..90ce3874 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -193,10 +193,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn);
                     // calling a native function
                     x87_forget(dyn, ninst, x3, x4, 0);
+                    sse_purge07cache(dyn, ninst, x3);
                     TABLE64(xRIP, dyn->insts[ninst].natcall); // read the 0xCC already
-                    STORE_XEMU_REGS(xRIP);
+                    STORE_XEMU_MINIMUM(xRIP);
                     CALL_S(x64Int3, -1);
-                    LOAD_XEMU_REGS(xRIP);
+                    LOAD_XEMU_MINIMUM(xRIP);
                     TABLE64(x3, dyn->insts[ninst].natcall);
                     ADDx_U12(x3, x3, 2+8+8);
                     CMPSx_REG(xRIP, x3);
diff --git a/src/dynarec/dynarec_arm64_helper.c b/src/dynarec/dynarec_arm64_helper.c
index c0a084db..2ddb3e99 100755
--- a/src/dynarec/dynarec_arm64_helper.c
+++ b/src/dynarec/dynarec_arm64_helper.c
@@ -812,7 +812,7 @@ static void mmx_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
 static void sse_reset(dynarec_arm_t* dyn, int ninst)
 {
 #if STEP > 1
-    for (int i=0; i<8; ++i)
+    for (int i=0; i<16; ++i)
         dyn->ssecache[i] = -1;
 #endif
 }
@@ -841,14 +841,35 @@ int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a)
     return 0;
 #endif
 }
-// purge the SSE cache only(needs 3 scratch registers)
-static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1)
+// purge the SSE cache for XMM0..XMM7 (to use before function native call)
+void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1)
 {
 #if STEP > 1
     int old = -1;
     for (int i=0; i<8; ++i)
         if(dyn->ssecache[i]!=-1) {
             if (old==-1) {
+                MESSAGE(LOG_DUMP, "\tPurge XMM0..7 Cache ------\n");
+                ++old;
+            }
+            VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i]));
+            fpu_free_reg_quad(dyn, dyn->ssecache[i]);
+            dyn->ssecache[i] = -1;
+        }
+    if(old!=-1) {
+        MESSAGE(LOG_DUMP, "\t------ Purge XMM0..7 Cache\n");
+    }
+#endif
+}
+
+// purge the SSE cache only
+static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1)
+{
+#if STEP > 1
+    int old = -1;
+    for (int i=0; i<16; ++i)
+        if(dyn->ssecache[i]!=-1) {
+            if (old==-1) {
                 MESSAGE(LOG_DUMP, "\tPurge SSE Cache ------\n");
                 ++old;
             }
@@ -865,7 +886,7 @@ static void sse_purgecache(dynarec_arm_t* dyn, int ninst, int s1)
 static void sse_reflectcache(dynarec_arm_t* dyn, int ninst, int s1)
 {
 #if STEP > 1
-    for (int i=0; i<8; ++i)
+    for (int i=0; i<16; ++i)
         if(dyn->ssecache[i]!=-1) {
             VSTR128_U12(dyn->ssecache[i], xEmu, offsetof(x64emu_t, xmm[i]));
         }
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index c7f645b0..68021614 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -347,6 +347,34 @@
     LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \
     if(A) {LDRx_U12(A, xEmu, offsetof(x64emu_t, ip));}
 
+#define STORE_XEMU_MINIMUM(A)  \
+    STORE_REG(RAX);         \
+    STORE_REG(RCX);         \
+    STORE_REG(RDX);         \
+    STORE_REG(RBX);         \
+    STORE_REG(RSP);         \
+    STORE_REG(RBP);         \
+    STORE_REG(RSI);         \
+    STORE_REG(RDI);         \
+    STORE_REG(R8);          \
+    STORE_REG(R9);          \
+    STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \
+    if(A) {STRx_U12(A, xEmu, offsetof(x64emu_t, ip));}
+
+#define LOAD_XEMU_MINIMUM(A)  \
+    LOAD_REG(RAX);         \
+    LOAD_REG(RCX);         \
+    LOAD_REG(RDX);         \
+    LOAD_REG(RBX);         \
+    LOAD_REG(RSP);         \
+    LOAD_REG(RBP);         \
+    LOAD_REG(RSI);         \
+    LOAD_REG(RDI);         \
+    LOAD_REG(R8);          \
+    LOAD_REG(R9);          \
+    LDRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags)); \
+    if(A) {LDRx_U12(A, xEmu, offsetof(x64emu_t, ip));}
+
 #define SET_DFNONE(S)    if(!dyn->dfnone) {MOVZw(S, d_none); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=1;}
 #define SET_DF(S, N)     if(N) {MOVZw(S, N); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->dfnone=0;} else SET_DFNONE(S)
 #define SET_NODF()          dyn->dfnone = 0
@@ -535,6 +563,7 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty)
 #define sse_get_reg     STEPNAME(sse_get_reg)
 #define sse_get_reg_empty STEPNAME(sse_get_reg_empty)
+#define sse_purge07cache STEPNAME(sse_purge07cache)
 
 #define fpu_pushcache   STEPNAME(fpu_pushcache)
 #define fpu_popcache    STEPNAME(fpu_popcache)
@@ -669,6 +698,8 @@ int mmx_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a);
 int sse_get_reg(dynarec_arm_t* dyn, int ninst, int s1, int a);
 // get neon register for a SSE reg, but don't try to synch it if it needed to be created
 int sse_get_reg_empty(dynarec_arm_t* dyn, int ninst, int s1, int a);
+// purge the XMM0..XMM7 cache (before function call)
+void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1);
 
 // common coproc helpers
 // reset the cache
diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h
index c375dd10..cb81b101 100755
--- a/src/dynarec/dynarec_arm64_private.h
+++ b/src/dynarec/dynarec_arm64_private.h
@@ -30,11 +30,11 @@ typedef struct dynarec_arm_s {
     uintptr_t           arm_start;  // start of the arm code
     int                 arm_size;   // size of emitted arm code
     int                 state_flags;// actual state for on-demand flags
-    int                 x87cache[8];// cache status for the 8 x87 register behind the fpu stack
-    int                 x87reg[8];  // reg used for x87cache entry
-    int                 mmxcache[8];// cache status for the 8 MMX registers
-    int                 ssecache[8];// cache status for the 8 SSE(2) registers
-    int                 fpuused[24];// all 8..31 Q reg from fpu, used by x87, sse and mmx
+    int8_t              x87cache[8];// cache status for the 8 x87 register behind the fpu stack
+    int8_t              x87reg[8];  // reg used for x87cache entry
+    int8_t              mmxcache[8];// cache status for the 8 MMX registers
+    int8_t              ssecache[16];// cache status for the 16 SSE(2) registers
+    int8_t              fpuused[24];// all 8..31 Q reg from fpu, used by x87, sse and mmx
     int                 x87stack;   // cache stack counter
     int                 fpu_scratch;// scratch counter
     int                 fpu_extra_qscratch; // some opcode need an extra quad scratch register