about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-03-22 12:34:45 +0000
committerptitSeb <sebastien.chev@gmail.com>2023-03-22 12:34:45 +0000
commit79ec2ee8f7a923390c5933fc4c3a282e8b650493 (patch)
tree641a05340b4c0363dcd52ad308593dc9136188b8
parent5c4f0de1d0de47f619c192d85c538b6f85d00843 (diff)
downloadbox64-79ec2ee8f7a923390c5933fc4c3a282e8b650493.tar.gz
box64-79ec2ee8f7a923390c5933fc4c3a282e8b650493.zip
[RV64_DYNAREC] Improved handling of Float/Double cache on functions calls
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_db.c12
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c117
5 files changed, 95 insertions, 44 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index ec46d1e5..ca7ec07e 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -931,8 +931,8 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     *need_epilog = 1;
                 } else {
                     MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip)));
-                    //x87_forget(dyn, ninst, x3, x4, 0);
-                    //sse_purge07cache(dyn, ninst, x3);
+                    x87_forget(dyn, ninst, x3, x4, 0);
+                    sse_purge07cache(dyn, ninst, x3);
                     // disabling isSimpleWrapper because all signed value less than 64bits needs to be sign extended
                     // and return value needs to be cleanned up
                     tmp = 0;//isSimpleWrapper(*(wrapper_t*)(addr));
@@ -1089,7 +1089,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     PUSH1(x2);
                     MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn);
                     // calling a native function
-                    //sse_purge07cache(dyn, ninst, x3);     // TODO: chack the fpxx to purge/save when implemented
+                    sse_purge07cache(dyn, ninst, x3);
                     if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall) {
                         // disabling isSimpleWrapper because all signed value less than 64bits needs to be sign extended
                         // and return value needs to be cleanned up
diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c
index b31ed9ad..1bc42f1e 100644
--- a/src/dynarec/rv64/dynarec_rv64_db.c
+++ b/src/dynarec/rv64/dynarec_rv64_db.c
@@ -195,11 +195,11 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
                 case 5:
                     INST_NAME("FLD tbyte");
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0);
                     if((PK(0)==0xDB && ((PK(1)>>3)&7)==7) || (PK(0)>=0x40 && PK(0)<=0x4f && PK(1)==0xDB && ((PK(2)>>3)&7)==7)) {
                         // the FLD is immediatly followed by an FSTP
-                        LD(x5, ed, 0);
-                        LH(x6, ed, 8);
+                        LD(x5, ed, fixedaddress+0);
+                        LH(x6, ed, fixedaddress+8);
                         // no persistant scratch register, so unrool both instruction here...
                         MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n");
                         nextop = F8;    // 0xDB or rex
@@ -209,9 +209,9 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         } else
                             rex.rex = 0;
                         nextop = F8;    //modrm
-                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                        SD(x5, ed, 0);
-                        SH(x6, ed, 8);
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0);
+                        SD(x5, ed, fixedaddress+0);
+                        SH(x6, ed, fixedaddress+8);
                     } else {
                         if(box64_x87_no80bits) {
                             v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_D);
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
index 8994e0e5..2bdda0a0 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.c
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -33,8 +33,6 @@
 #define X870    XMM0+16
 #define EMM0    XMM0+16
 
-#define SCRATCH0    0
-
 // Get a FPU scratch reg
 int fpu_get_scratch(dynarec_rv64_t* dyn)
 {
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h
index 63640b0b..5de69519 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.h
+++ b/src/dynarec/rv64/dynarec_rv64_functions.h
@@ -6,6 +6,8 @@
 typedef struct x64emu_s x64emu_t;
 typedef struct dynarec_rv64_s dynarec_rv64_t;
 
+#define SCRATCH0    2
+
 // Get an FPU scratch reg
 int fpu_get_scratch(dynarec_rv64_t* dyn);
 // Reset scratch regs counter
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index b049a93b..f402cf45 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -39,6 +39,9 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
     uint8_t ret = x2;
     *fixaddress = 0;
     if(hint>0) ret = hint;
+    int maxval = 2047;
+    if(i12>1)
+        maxval -= i12;
     MAYUSE(scratch);
     if(!(nextop&0xC0)) {
         if((nextop&7)==4) {
@@ -47,7 +50,7 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
             if((sib&0x7)==5) {
                 int64_t tmp = F32S;
                 if (sib_reg!=4) {
-                    if(tmp && ((tmp<-2048) || (tmp>2047) || !i12)) {
+                    if(tmp && ((tmp<-2048) || (tmp>maxval) || !i12)) {
                         MOV64x(scratch, tmp);
                         if((sib>>6)) {
                             SLLI(ret, xRAX+sib_reg, (sib>>6));
@@ -84,16 +87,16 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
         } else if((nextop&7)==5) {
             int64_t tmp = F32S64;
             int64_t adj = dyn->last_ip?((addr+delta)-dyn->last_ip):0;
-            if(i12 && adj && (tmp+adj>=-2048) && (tmp+adj<=2047)) {
+            if(i12 && adj && (tmp+adj>=-2048) && (tmp+adj<=maxval)) {
                 ret = xRIP;
                 *fixaddress = tmp+adj;
-            } else if(i12 && (tmp>=-2048) && (tmp<=2047)) {
+            } else if(i12 && (tmp>=-2048) && (tmp<=maxval)) {
                 GETIP(addr+delta);
                 ret = xRIP;
                 *fixaddress = tmp;
-            } else if(adj && (tmp+adj>=-2048) && (tmp+adj<=2047)) {
+            } else if(adj && (tmp+adj>=-2048) && (tmp+adj<=maxval)) {
                 ADDI(ret, xRIP, tmp+adj);
-            } else if((tmp>=-2048) && (tmp<=2047)) {
+            } else if((tmp>=-2048) && (tmp<=maxval)) {
                 GETIP(addr+delta);
                 ADDI(ret, xRIP, tmp);
             } else if(tmp+addr+delta<0x100000000LL) {
@@ -716,8 +719,8 @@ void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in
                 if(next) {
                     // need to check if a ST_F need local promotion
                     if(extcache_get_st_f(dyn, ninst, dyn->e.x87cache[i])>=0) {
-                        FCVTDS(0, dyn->e.x87reg[i]);
-                        FSD(0, s1, offsetof(x64emu_t, x87));    // save the value
+                        FCVTDS(SCRATCH0, dyn->e.x87reg[i]);
+                        FSD(SCRATCH0, s1, offsetof(x64emu_t, x87));    // save the value
                     } else {
                         FSD(dyn->e.x87reg[i], s1, offsetof(x64emu_t, x87));    // save the value
                     }
@@ -849,8 +852,8 @@ void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st)
     }
     ADD(s1, xEmu, s2);
     if(dyn->e.extcache[dyn->e.x87reg[ret]].t==EXT_CACHE_ST_F) {
-        FCVTDS(0, dyn->e.x87reg[ret]);
-        FSD(31, s1, offsetof(x64emu_t, x87));
+        FCVTDS(SCRATCH0, dyn->e.x87reg[ret]);
+        FSD(SCRATCH0, s1, offsetof(x64emu_t, x87));
     } else {
         FSD(dyn->e.x87reg[ret], s1, offsetof(x64emu_t, x87));
     }
@@ -1185,43 +1188,91 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1)
 #endif
 void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
 {
+    // need to save 0..1 && 10..17 (maybe) && 28..31
+    // so 0..7 (SSE) && 17..23 (x87+MMX)
     int start = not07?8:0;
     // only SSE regs needs to be push back to xEmu (needs to be "write")
     int n=0;
-    for (int i=start; i<16; i++)
+    for (int i=start; i<8; i++)
         if(dyn->e.ssecache[i].v!=-1)
             ++n;
-    if(!n)
-        return;
-    MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n);
-    for (int i=start; i<16; ++i)
-        if(dyn->e.ssecache[i].v!=-1) {
-            if(dyn->e.ssecache[i].single)
-                FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
-            else
-                FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
-        }
-    MESSAGE(LOG_DUMP, "\t------- Push XMM Cache (%d)\n", n);
+    if(n) {
+        MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n);
+        for (int i=start; i<8; ++i)
+            if(dyn->e.ssecache[i].v!=-1) {
+                if(dyn->e.ssecache[i].single)
+                    FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
+                else
+                    FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
+            }
+        MESSAGE(LOG_DUMP, "\t------- Push XMM Cache (%d)\n", n);
+    }
+    n = 0;
+    for(int i=17; i<24; ++i)
+        if(dyn->e.extcache[i].v!=0)
+            ++n;
+    if(n) {
+        MESSAGE(LOG_DUMP, "\tPush x87/MMX Cache (%d)------\n", n);
+        ADDI(xSP, xSP, -8*((n+1)&~1));
+        int p = 0;
+        for(int i=17; i<24; ++i)
+            if(dyn->e.extcache[i].v!=0) {
+                switch(dyn->e.extcache[i].t) {
+                    case EXT_CACHE_ST_F: 
+                    case EXT_CACHE_SS: 
+                        FSW(EXTREG(i), xSP, p*8);
+                        break;
+                    default:
+                        FSD(EXTREG(i), xSP, p*8);
+                        break;
+                };
+                ++p;
+            }
+        MESSAGE(LOG_DUMP, "\t------- Push x87/MMX Cache (%d)\n", n);
+    }
 }
 void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
 {
     int start = not07?8:0;
     // only SSE regs needs to be pop back from xEmu (don't need to be "write" this time)
     int n=0;
-    for (int i=start; i<16; i++)
+    for (int i=start; i<8; i++)
         if(dyn->e.ssecache[i].v!=-1)
             ++n;
-    if(!n)
-        return;
-    MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n);
-    for (int i=start; i<16; ++i)
-        if(dyn->e.ssecache[i].v!=-1) {
-            if(dyn->e.ssecache[i].single)
-                FLW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
-            else
-                FLD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
-        }
-    MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n);
+    if(n) {
+        MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n);
+        for (int i=start; i<8; ++i)
+            if(dyn->e.ssecache[i].v!=-1) {
+                if(dyn->e.ssecache[i].single)
+                    FLW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
+                else
+                    FLD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i]));
+            }
+        MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n);
+    }
+    n = 0;
+    for(int i=17; i<24; ++i)
+        if(dyn->e.extcache[i].v!=0)
+            ++n;
+    if(n) {
+        MESSAGE(LOG_DUMP, "\tPush x87/MMX Cache (%d)------\n", n);
+        int p = 0;
+        for(int i=17; i<24; ++i)
+            if(dyn->e.extcache[i].v!=0) {
+                switch(dyn->e.extcache[i].t) {
+                    case EXT_CACHE_ST_F: 
+                    case EXT_CACHE_SS: 
+                        FLW(EXTREG(i), xSP, p*8);
+                        break;
+                    default:
+                        FLD(EXTREG(i), xSP, p*8);
+                        break;
+                };
+                ++p;
+            }
+        ADDI(xSP, xSP, 8*((n+1)&~1));
+        MESSAGE(LOG_DUMP, "\t------- Push x87/MMX Cache (%d)\n", n);
+    }
 }
 
 void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3)