diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-03-22 12:34:45 +0000 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-03-22 12:34:45 +0000 |
| commit | 79ec2ee8f7a923390c5933fc4c3a282e8b650493 (patch) | |
| tree | 641a05340b4c0363dcd52ad308593dc9136188b8 | |
| parent | 5c4f0de1d0de47f619c192d85c538b6f85d00843 (diff) | |
| download | box64-79ec2ee8f7a923390c5933fc4c3a282e8b650493.tar.gz box64-79ec2ee8f7a923390c5933fc4c3a282e8b650493.zip | |
[RV64_DYNAREC] Improved handling of Float/Double cache on functions calls
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_db.c | 12 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.h | 2 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 117 |
5 files changed, 95 insertions, 44 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c index ec46d1e5..ca7ec07e 100644 --- a/src/dynarec/rv64/dynarec_rv64_00.c +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -931,8 +931,8 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni *need_epilog = 1; } else { MESSAGE(LOG_DUMP, "Native Call to %s\n", GetNativeName(GetNativeFnc(ip))); - //x87_forget(dyn, ninst, x3, x4, 0); - //sse_purge07cache(dyn, ninst, x3); + x87_forget(dyn, ninst, x3, x4, 0); + sse_purge07cache(dyn, ninst, x3); // disabling isSimpleWrapper because all signed value less than 64bits needs to be sign extended // and return value needs to be cleanned up tmp = 0;//isSimpleWrapper(*(wrapper_t*)(addr)); @@ -1089,7 +1089,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni PUSH1(x2); MESSAGE(LOG_DUMP, "Native Call to %s (retn=%d)\n", GetNativeName(GetNativeFnc(dyn->insts[ninst].natcall-1)), dyn->insts[ninst].retn); // calling a native function - //sse_purge07cache(dyn, ninst, x3); // TODO: chack the fpxx to purge/save when implemented + sse_purge07cache(dyn, ninst, x3); if((box64_log<2 && !cycle_log) && dyn->insts[ninst].natcall) { // disabling isSimpleWrapper because all signed value less than 64bits needs to be sign extended // and return value needs to be cleanned up diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c index b31ed9ad..1bc42f1e 100644 --- a/src/dynarec/rv64/dynarec_rv64_db.c +++ b/src/dynarec/rv64/dynarec_rv64_db.c @@ -195,11 +195,11 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: INST_NAME("FLD tbyte"); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0); if((PK(0)==0xDB && ((PK(1)>>3)&7)==7) || (PK(0)>=0x40 && PK(0)<=0x4f && PK(1)==0xDB && ((PK(2)>>3)&7)==7)) { // the FLD is immediatly followed by an FSTP - LD(x5, ed, 0); - LH(x6, ed, 8); + LD(x5, ed, fixedaddress+0); + LH(x6, ed, fixedaddress+8); // no persistant scratch register, so unrool both instruction here... MESSAGE(LOG_DUMP, "\tHack: FSTP tbyte\n"); nextop = F8; // 0xDB or rex @@ -209,9 +209,9 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni } else rex.rex = 0; nextop = F8; //modrm - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0); - SD(x5, ed, 0); - SH(x6, ed, 8); + addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 8, 0); + SD(x5, ed, fixedaddress+0); + SH(x6, ed, fixedaddress+8); } else { if(box64_x87_no80bits) { v1 = x87_do_push(dyn, ninst, x1, EXT_CACHE_ST_D); diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index 8994e0e5..2bdda0a0 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -33,8 +33,6 @@ #define X870 XMM0+16 #define EMM0 XMM0+16 -#define SCRATCH0 0 - // Get a FPU scratch reg int fpu_get_scratch(dynarec_rv64_t* dyn) { diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h index 63640b0b..5de69519 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.h +++ b/src/dynarec/rv64/dynarec_rv64_functions.h @@ -6,6 +6,8 @@ typedef struct x64emu_s x64emu_t; typedef struct dynarec_rv64_s dynarec_rv64_t; +#define SCRATCH0 2 + // Get an FPU scratch reg int fpu_get_scratch(dynarec_rv64_t* dyn); // Reset scratch regs counter diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index b049a93b..f402cf45 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -39,6 +39,9 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t ret = x2; *fixaddress = 0; if(hint>0) ret = hint; + int maxval = 2047; + if(i12>1) + maxval -= i12; MAYUSE(scratch); if(!(nextop&0xC0)) { if((nextop&7)==4) { @@ -47,7 +50,7 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, if((sib&0x7)==5) { int64_t tmp = F32S; if (sib_reg!=4) { - if(tmp && ((tmp<-2048) || (tmp>2047) || !i12)) { + if(tmp && ((tmp<-2048) || (tmp>maxval) || !i12)) { MOV64x(scratch, tmp); if((sib>>6)) { SLLI(ret, xRAX+sib_reg, (sib>>6)); @@ -84,16 +87,16 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, } else if((nextop&7)==5) { int64_t tmp = F32S64; int64_t adj = dyn->last_ip?((addr+delta)-dyn->last_ip):0; - if(i12 && adj && (tmp+adj>=-2048) && (tmp+adj<=2047)) { + if(i12 && adj && (tmp+adj>=-2048) && (tmp+adj<=maxval)) { ret = xRIP; *fixaddress = tmp+adj; - } else if(i12 && (tmp>=-2048) && (tmp<=2047)) { + } else if(i12 && (tmp>=-2048) && (tmp<=maxval)) { GETIP(addr+delta); ret = xRIP; *fixaddress = tmp; - } else if(adj && (tmp+adj>=-2048) && (tmp+adj<=2047)) { + } else if(adj && (tmp+adj>=-2048) && (tmp+adj<=maxval)) { ADDI(ret, xRIP, tmp+adj); - } else if((tmp>=-2048) && (tmp<=2047)) { + } else if((tmp>=-2048) && (tmp<=maxval)) { GETIP(addr+delta); ADDI(ret, xRIP, tmp); } else if(tmp+addr+delta<0x100000000LL) { @@ -716,8 +719,8 @@ void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in if(next) { // need to check if a ST_F need local promotion if(extcache_get_st_f(dyn, ninst, dyn->e.x87cache[i])>=0) { - FCVTDS(0, dyn->e.x87reg[i]); - FSD(0, s1, offsetof(x64emu_t, x87)); // save the value + FCVTDS(SCRATCH0, dyn->e.x87reg[i]); + FSD(SCRATCH0, s1, offsetof(x64emu_t, x87)); // save the value } else { FSD(dyn->e.x87reg[i], s1, offsetof(x64emu_t, x87)); // save the value } @@ -849,8 +852,8 @@ void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) } ADD(s1, xEmu, s2); if(dyn->e.extcache[dyn->e.x87reg[ret]].t==EXT_CACHE_ST_F) { - FCVTDS(0, dyn->e.x87reg[ret]); - FSD(31, s1, offsetof(x64emu_t, x87)); + FCVTDS(SCRATCH0, dyn->e.x87reg[ret]); + FSD(SCRATCH0, s1, offsetof(x64emu_t, x87)); } else { FSD(dyn->e.x87reg[ret], s1, offsetof(x64emu_t, x87)); } @@ -1185,43 +1188,91 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) #endif void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) { + // need to save 0..1 && 10..17 (maybe) && 28..31 + // so 0..7 (SSE) && 17..23 (x87+MMX) int start = not07?8:0; // only SSE regs needs to be push back to xEmu (needs to be "write") int n=0; - for (int i=start; i<16; i++) + for (int i=start; i<8; i++) if(dyn->e.ssecache[i].v!=-1) ++n; - if(!n) - return; - MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n); - for (int i=start; i<16; ++i) - if(dyn->e.ssecache[i].v!=-1) { - if(dyn->e.ssecache[i].single) - FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); - else - FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); - } - MESSAGE(LOG_DUMP, "\t------- Push XMM Cache (%d)\n", n); + if(n) { + MESSAGE(LOG_DUMP, "\tPush XMM Cache (%d)------\n", n); + for (int i=start; i<8; ++i) + if(dyn->e.ssecache[i].v!=-1) { + if(dyn->e.ssecache[i].single) + FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); + else + FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); + } + MESSAGE(LOG_DUMP, "\t------- Push XMM Cache (%d)\n", n); + } + n = 0; + for(int i=17; i<24; ++i) + if(dyn->e.extcache[i].v!=0) + ++n; + if(n) { + MESSAGE(LOG_DUMP, "\tPush x87/MMX Cache (%d)------\n", n); + ADDI(xSP, xSP, -8*((n+1)&~1)); + int p = 0; + for(int i=17; i<24; ++i) + if(dyn->e.extcache[i].v!=0) { + switch(dyn->e.extcache[i].t) { + case EXT_CACHE_ST_F: + case EXT_CACHE_SS: + FSW(EXTREG(i), xSP, p*8); + break; + default: + FSD(EXTREG(i), xSP, p*8); + break; + }; + ++p; + } + MESSAGE(LOG_DUMP, "\t------- Push x87/MMX Cache (%d)\n", n); + } } void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) { int start = not07?8:0; // only SSE regs needs to be pop back from xEmu (don't need to be "write" this time) int n=0; - for (int i=start; i<16; i++) + for (int i=start; i<8; i++) if(dyn->e.ssecache[i].v!=-1) ++n; - if(!n) - return; - MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n); - for (int i=start; i<16; ++i) - if(dyn->e.ssecache[i].v!=-1) { - if(dyn->e.ssecache[i].single) - FLW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); - else - FLD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); - } - MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n); + if(n) { + MESSAGE(LOG_DUMP, "\tPop XMM Cache (%d)------\n", n); + for (int i=start; i<8; ++i) + if(dyn->e.ssecache[i].v!=-1) { + if(dyn->e.ssecache[i].single) + FLW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); + else + FLD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); + } + MESSAGE(LOG_DUMP, "\t------- Pop XMM Cache (%d)\n", n); + } + n = 0; + for(int i=17; i<24; ++i) + if(dyn->e.extcache[i].v!=0) + ++n; + if(n) { + MESSAGE(LOG_DUMP, "\tPush x87/MMX Cache (%d)------\n", n); + int p = 0; + for(int i=17; i<24; ++i) + if(dyn->e.extcache[i].v!=0) { + switch(dyn->e.extcache[i].t) { + case EXT_CACHE_ST_F: + case EXT_CACHE_SS: + FLW(EXTREG(i), xSP, p*8); + break; + default: + FLD(EXTREG(i), xSP, p*8); + break; + }; + ++p; + } + ADDI(xSP, xSP, 8*((n+1)&~1)); + MESSAGE(LOG_DUMP, "\t------- Push x87/MMX Cache (%d)\n", n); + } } void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3) |