diff options
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_df.c | 111 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 109 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.h | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 103 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 7 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 15 |
6 files changed, 244 insertions, 107 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c index a2f66677..704e1302 100644 --- a/src/dynarec/rv64/dynarec_rv64_df.c +++ b/src/dynarec/rv64/dynarec_rv64_df.c @@ -172,25 +172,30 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 5: INST_NAME("FILD ST0, i64"); - X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_D); + X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_I64); addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); - LD(x1, wback, fixedaddress); - if (rex.is32bits) { - // need to also feed the STll stuff... - ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll)); - LWU(x5, xEmu, offsetof(x64emu_t, top)); - int a = 0 - dyn->e.x87stack; - if(a) { - ADDIW(x5, x5, a); - ANDI(x5, x5, 0x7); + + if (ST_IS_I64(0)) { + FLD(v1, wback, fixedaddress); + } else { + LD(x1, wback, fixedaddress); + if (rex.is32bits) { + // need to also feed the STll stuff... + ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll)); + LWU(x5, xEmu, offsetof(x64emu_t, top)); + int a = 0 - dyn->e.x87stack; + if (a) { + ADDIW(x5, x5, a); + ANDI(x5, x5, 0x7); + } + SLLI(x5, x5, 4); // fpu_ll is 2 i64 + ADD(x5, x5, x4); + SD(x1, x5, 8); // ll + } + FCVTDL(v1, x1, RD_RTZ); + if (rex.is32bits) { + FSD(v1, x5, 0); // ref } - SLLI(x5, x5, 4); // fpu_ll is 2 i64 - ADD(x5, x5, x4); - SD(x1, x5, 8); // ll - } - FCVTDL(v1, x1, RD_RTZ); - if(rex.is32bits) { - FSD(v1, x5, 0); // ref } break; case 6: @@ -203,44 +208,50 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni break; case 7: INST_NAME("FISTP i64, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_D); - u8 = x87_setround(dyn, ninst, x1, x2); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, EXT_CACHE_ST_I64); + if (!ST_IS_I64(0)) { + u8 = x87_setround(dyn, ninst, x1, x2); + } addr = geted(dyn, addr, ninst, nextop, &wback, x2, x3, &fixedaddress, rex, NULL, 1, 0); - if(rex.is32bits) { - // need to check STll first... - ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll)); - LWU(x5, xEmu, offsetof(x64emu_t, top)); - int a = 0 - dyn->e.x87stack; - if(a) { - ADDIW(x5, x5, a); - ANDI(x5, x5, 0x7); + if (ST_IS_I64(0)) { + FSD(v1, wback, fixedaddress); + } else { + if (rex.is32bits) { + // need to check STll first... + ADDI(x4, xEmu, offsetof(x64emu_t, fpu_ll)); + LWU(x5, xEmu, offsetof(x64emu_t, top)); + int a = 0 - dyn->e.x87stack; + if (a) { + ADDIW(x5, x5, a); + ANDI(x5, x5, 0x7); + } + SLLI(x5, x5, 4); // fpu_ll is 2 i64 + ADD(x5, x5, x4); + FMVXD(x3, v1); + LD(x6, x5, 0); // ref + BNE_MARK(x6, x3); + LD(x6, x5, 8); // ll + SD(x6, wback, fixedaddress); + B_MARK3_nocond; + MARK; } - SLLI(x5, x5, 4); // fpu_ll is 2 i64 - ADD(x5, x5, x4); - FMVXD(x3, v1); - LD(x6, x5, 0); // ref - BNE_MARK(x6, x3); - LD(x6, x5, 8); // ll - SD(x6, wback, fixedaddress); - B_MARK3_nocond; - MARK; - } - if(!box64_dynarec_fastround) { - FSFLAGSI(0); // reset all bits - } - FCVTLD(x4, v1, RD_DYN); - if(!box64_dynarec_fastround) { - FRFLAGS(x5); // get back FPSR to check the IOC bit - ANDI(x5, x5, 1<<FR_NV); - BEQ_MARK2(x5, xZR); - MOV64x(x4, 0x8000000000000000LL); + if (!box64_dynarec_fastround) { + FSFLAGSI(0); // reset all bits + } + FCVTLD(x4, v1, RD_DYN); + if (!box64_dynarec_fastround) { + FRFLAGS(x5); // get back FPSR to check the IOC bit + ANDI(x5, x5, 1 << FR_NV); + BEQ_MARK2(x5, xZR); + MOV64x(x4, 0x8000000000000000LL); + } + MARK2; + SD(x4, wback, fixedaddress); + MARK3; + x87_restoreround(dyn, ninst, u8); } - MARK2; - SD(x4, wback, fixedaddress); - MARK3; - x87_restoreround(dyn, ninst, u8); X87_POP_OR_FAIL(dyn, ninst, x3); break; default: diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index f01c638b..b9995f7d 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -58,7 +58,7 @@ void fpu_free_reg(dynarec_rv64_t* dyn, int reg) int idx = EXTIDX(reg); // TODO: check upper limit? dyn->e.fpuused[idx] = 0; - if(dyn->e.extcache[idx].t!=EXT_CACHE_ST_F && dyn->e.extcache[idx].t!=EXT_CACHE_ST_D) + if (dyn->e.extcache[idx].t != EXT_CACHE_ST_F && dyn->e.extcache[idx].t != EXT_CACHE_ST_D && dyn->e.extcache[idx].t != EXT_CACHE_ST_I64) dyn->e.extcache[idx].v = 0; } // Get an MMX double reg @@ -90,18 +90,28 @@ void fpu_reset_reg(dynarec_rv64_t* dyn) } } +int extcache_no_i64(dynarec_rv64_t* dyn, int ninst, int st, int a) +{ + if (a == EXT_CACHE_ST_I64) { + extcache_promote_double(dyn, ninst, st); + return EXT_CACHE_ST_D; + } + return a; +} + int extcache_get_st(dynarec_rv64_t* dyn, int ninst, int a) { if (dyn->insts[ninst].e.swapped) { - if(dyn->insts[ninst].e.combined1 == a) + if (dyn->insts[ninst].e.combined1 == a) a = dyn->insts[ninst].e.combined2; - else if(dyn->insts[ninst].e.combined2 == a) + else if (dyn->insts[ninst].e.combined2 == a) a = dyn->insts[ninst].e.combined1; } - for(int i=0; i<24; ++i) - if((dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F - || dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D) - && dyn->insts[ninst].e.extcache[i].n==a) + for (int i = 0; i < 24; ++i) + if ((dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F + || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_D + || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64) + && dyn->insts[ninst].e.extcache[i].n == a) return dyn->insts[ninst].e.extcache[i].t; // not in the cache yet, so will be fetched... return EXT_CACHE_ST_D; @@ -110,12 +120,13 @@ int extcache_get_st(dynarec_rv64_t* dyn, int ninst, int a) int extcache_get_current_st(dynarec_rv64_t* dyn, int ninst, int a) { (void)ninst; - if(!dyn->insts) + if (!dyn->insts) return EXT_CACHE_ST_D; - for(int i=0; i<24; ++i) - if((dyn->e.extcache[i].t==EXT_CACHE_ST_F - || dyn->e.extcache[i].t==EXT_CACHE_ST_D) - && dyn->e.extcache[i].n==a) + for (int i = 0; i < 24; ++i) + if ((dyn->e.extcache[i].t == EXT_CACHE_ST_F + || dyn->e.extcache[i].t == EXT_CACHE_ST_D + || dyn->e.extcache[i].t == EXT_CACHE_ST_I64) + && dyn->e.extcache[i].n == a) return dyn->e.extcache[i].t; // not in the cache yet, so will be fetched... return EXT_CACHE_ST_D; @@ -129,6 +140,16 @@ int extcache_get_st_f(dynarec_rv64_t* dyn, int ninst, int a) return i; return -1; } + +int extcache_get_st_f_i64(dynarec_rv64_t* dyn, int ninst, int a) +{ + for (int i = 0; i < 24; ++i) + if ((dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64 || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F) + && dyn->insts[ninst].e.extcache[i].n == a) + return i; + return -1; +} + int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a) { for(int i=0; i<24; ++i) @@ -137,6 +158,16 @@ int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a) return i; return -1; } + +int extcache_get_st_f_i64_noback(dynarec_rv64_t* dyn, int ninst, int a) +{ + for (int i = 0; i < 24; ++i) + if ((dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64 || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F) + && dyn->insts[ninst].e.extcache[i].n == a) + return i; + return -1; +} + int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a) { for(int i=0; i<24; ++i) @@ -146,6 +177,15 @@ int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a) return -1; } +int extcache_get_current_st_f_i64(dynarec_rv64_t* dyn, int a) +{ + for (int i = 0; i < 24; ++i) + if ((dyn->e.extcache[i].t == EXT_CACHE_ST_I64 || dyn->e.extcache[i].t == EXT_CACHE_ST_F) + && dyn->e.extcache[i].n == a) + return i; + return -1; +} + static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int maxinst, int a); static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int maxinst, int a); static void extcache_promote_double_combined(dynarec_rv64_t* dyn, int ninst, int maxinst, int a) @@ -155,7 +195,7 @@ static void extcache_promote_double_combined(dynarec_rv64_t* dyn, int ninst, int a = dyn->insts[ninst].e.combined2; } else a = dyn->insts[ninst].e.combined1; - int i = extcache_get_st_f_noback(dyn, ninst, a); + int i = extcache_get_st_f_i64_noback(dyn, ninst, a); //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_combined, ninst=%d combined%c %d i=%d (stack:%d/%d)\n", ninst, (a == dyn->insts[ninst].e.combined2)?'2':'1', a ,i, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop); if(i>=0) { dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D; @@ -174,7 +214,7 @@ static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int return; while(ninst>=0) { a+=dyn->insts[ninst].e.stack_pop; // adjust Stack depth: add pop'd ST (going backward) - int i = extcache_get_st_f(dyn, ninst, a); + int i = extcache_get_st_f_i64(dyn, ninst, a); //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_internal, ninst=%d, a=%d st=%d:%d, i=%d\n", ninst, a, dyn->insts[ninst].e.stack, dyn->insts[ninst].e.stack_next, i); if(i<0) return; dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D; @@ -209,7 +249,7 @@ static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int else if (a==dyn->insts[ninst].e.combined2) a = dyn->insts[ninst].e.combined1; } - int i = extcache_get_st_f_noback(dyn, ninst, a); + int i = extcache_get_st_f_i64_noback(dyn, ninst, a); //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_forward, ninst=%d, a=%d st=%d:%d(%d/%d), i=%d\n", ninst, a, dyn->insts[ninst].e.stack, dyn->insts[ninst].e.stack_next, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop, i); if(i<0) return; dyn->insts[ninst].e.extcache[i].t = EXT_CACHE_ST_D; @@ -230,7 +270,7 @@ static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int void extcache_promote_double(dynarec_rv64_t* dyn, int ninst, int a) { - int i = extcache_get_current_st_f(dyn, a); + int i = extcache_get_current_st_f_i64(dyn, a); //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double, ninst=%d a=%d st=%d i=%d\n", ninst, a, dyn->e.stack, i); if(i<0) return; dyn->e.extcache[i].t = EXT_CACHE_ST_D; @@ -266,19 +306,21 @@ int extcache_combine_st(dynarec_rv64_t* dyn, int ninst, int a, int b) return EXT_CACHE_ST_D; } -static int isCacheEmpty(dynarec_native_t* dyn, int ninst) { - if(dyn->insts[ninst].e.stack_next) { +static int isCacheEmpty(dynarec_native_t* dyn, int ninst) +{ + if (dyn->insts[ninst].e.stack_next) { return 0; } - for(int i=0; i<24; ++i) - if(dyn->insts[ninst].e.extcache[i].v) { // there is something at ninst for i - if(!( - (dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F || dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D) - && dyn->insts[ninst].e.extcache[i].n<dyn->insts[ninst].e.stack_pop)) + for (int i = 0; i < 24; ++i) + if (dyn->insts[ninst].e.extcache[i].v) { // there is something at ninst for i + if (!( + (dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F + || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_D + || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64) + && dyn->insts[ninst].e.extcache[i].n < dyn->insts[ninst].e.stack_pop)) return 0; } return 1; - } int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) { @@ -295,9 +337,11 @@ int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) { } for(int i=0; i<24 && !ret; ++i) if(dyn->insts[ninst].e.extcache[i].v) { // there is something at ninst for i - if(!( - (dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_F || dyn->insts[ninst].e.extcache[i].t==EXT_CACHE_ST_D) - && dyn->insts[ninst].e.extcache[i].n<dyn->insts[ninst].e.stack_pop)) + if (!( + (dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_F + || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_D + || dyn->insts[ninst].e.extcache[i].t == EXT_CACHE_ST_I64) + && dyn->insts[ninst].e.extcache[i].n < dyn->insts[ninst].e.stack_pop)) ret = 1; } return ret; @@ -329,7 +373,9 @@ void extcacheUnwind(extcache_t* cache) int a = -1; int b = -1; for(int j=0; j<24 && ((a==-1) || (b==-1)); ++j) - if((cache->extcache[j].t == EXT_CACHE_ST_D || cache->extcache[j].t == EXT_CACHE_ST_F)) { + if ((cache->extcache[j].t == EXT_CACHE_ST_D + || cache->extcache[j].t == EXT_CACHE_ST_F + || cache->extcache[j].t == EXT_CACHE_ST_I64)) { if(cache->extcache[j].n == cache->combined1) a = j; else if(cache->extcache[j].n == cache->combined2) @@ -362,7 +408,9 @@ void extcacheUnwind(extcache_t* cache) if(cache->stack_push) { // unpush for(int j=0; j<24; ++j) { - if((cache->extcache[j].t == EXT_CACHE_ST_D || cache->extcache[j].t == EXT_CACHE_ST_F)) { + if ((cache->extcache[j].t == EXT_CACHE_ST_D + || cache->extcache[j].t == EXT_CACHE_ST_F + || cache->extcache[j].t == EXT_CACHE_ST_I64)) { if(cache->extcache[j].n<cache->stack_push) cache->extcache[j].v = 0; else @@ -411,6 +459,7 @@ void extcacheUnwind(extcache_t* cache) break; case EXT_CACHE_ST_F: case EXT_CACHE_ST_D: + case EXT_CACHE_ST_I64: cache->x87cache[x87reg] = cache->extcache[i].n; cache->x87reg[x87reg] = EXTREG(i); ++x87reg; @@ -482,6 +531,7 @@ const char* getCacheName(int t, int n) switch(t) { case EXT_CACHE_ST_D: sprintf(buff, "ST%d", n); break; case EXT_CACHE_ST_F: sprintf(buff, "st%d", n); break; + case EXT_CACHE_ST_I64: sprintf(buff, "STi%d", n); break; case EXT_CACHE_MM: sprintf(buff, "MM%d", n); break; case EXT_CACHE_SS: sprintf(buff, "SS%d", n); break; case EXT_CACHE_SD: sprintf(buff, "SD%d", n); break; @@ -533,6 +583,7 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r switch(dyn->insts[ninst].e.extcache[ii].t) { case EXT_CACHE_ST_D: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_ST_F: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; + case EXT_CACHE_ST_I64: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_MM: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_SS: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_SD: dynarec_log(LOG_NONE, " %s:%s", fnames[EXTREG(ii)], getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h index 451336bd..b265f694 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.h +++ b/src/dynarec/rv64/dynarec_rv64_functions.h @@ -28,14 +28,20 @@ void fpu_reset_reg(dynarec_rv64_t* dyn); int extcache_get_st(dynarec_rv64_t* dyn, int ninst, int a); // Get if STx is FLOAT or DOUBLE int extcache_get_st_f(dynarec_rv64_t* dyn, int ninst, int a); +// Get if STx is FLOAT or I64 +int extcache_get_st_f_i64(dynarec_rv64_t* dyn, int ninst, int a); // Get actual type for STx int extcache_get_current_st(dynarec_rv64_t* dyn, int ninst, int a); // Get actual STx is FLOAT or DOUBLE int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a); +// Get actual STx is FLOAT or I64 +int extcache_get_current_st_f_i64(dynarec_rv64_t* dyn, int a); // Back-propagate a change float->double void extcache_promote_double(dynarec_rv64_t* dyn, int ninst, int a); // Combine and propagate if needed (pass 1 only) int extcache_combine_st(dynarec_rv64_t* dyn, int ninst, int a, int b); // with stack current dyn->n_stack* +// Do not allow i64 type +int extcache_no_i64(dynarec_rv64_t* dyn, int ninst, int st, int a); // FPU Cache transformation (for loops) // Specific, need to be written par backend int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst); diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index d175fabb..19b4c316 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -825,7 +825,9 @@ static void x87_reset(dynarec_rv64_t* dyn) dyn->e.swapped = 0; dyn->e.barrier = 0; for(int i=0; i<24; ++i) - if(dyn->e.extcache[i].t == EXT_CACHE_ST_F || dyn->e.extcache[i].t == EXT_CACHE_ST_D) + if (dyn->e.extcache[i].t == EXT_CACHE_ST_F + || dyn->e.extcache[i].t == EXT_CACHE_ST_D + || dyn->e.extcache[i].t == EXT_CACHE_ST_I64) dyn->e.extcache[i].v = 0; } @@ -878,7 +880,9 @@ int x87_do_push(dynarec_rv64_t* dyn, int ninst, int s1, int t) dyn->e.stack_push+=1; // move all regs in cache, and find a free one for(int j=0; j<24; ++j) - if((dyn->e.extcache[j].t == EXT_CACHE_ST_D) || (dyn->e.extcache[j].t == EXT_CACHE_ST_F)) + if ((dyn->e.extcache[j].t == EXT_CACHE_ST_D) + || (dyn->e.extcache[j].t == EXT_CACHE_ST_F) + || (dyn->e.extcache[j].t == EXT_CACHE_ST_I64)) ++dyn->e.extcache[j].n; int ret = -1; for(int i=0; i<8; ++i) @@ -901,7 +905,9 @@ void x87_do_push_empty(dynarec_rv64_t* dyn, int ninst, int s1) dyn->e.stack_push+=1; // move all regs in cache for(int j=0; j<24; ++j) - if((dyn->e.extcache[j].t == EXT_CACHE_ST_D) || (dyn->e.extcache[j].t == EXT_CACHE_ST_F)) + if ((dyn->e.extcache[j].t == EXT_CACHE_ST_D) + || (dyn->e.extcache[j].t == EXT_CACHE_ST_F) + || (dyn->e.extcache[j].t == EXT_CACHE_ST_I64)) ++dyn->e.extcache[j].n; for(int i=0; i<8; ++i) if(dyn->e.x87cache[i]!=-1) @@ -1080,9 +1086,13 @@ int x87_get_current_cache(dynarec_rv64_t* dyn, int ninst, int st, int t) for (int i=0; i<8; ++i) { if(dyn->e.x87cache[i]==st) { #if STEP == 1 - if(t==EXT_CACHE_ST_D && (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t==EXT_CACHE_ST_F)) + if (t == EXT_CACHE_ST_D && (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_F || dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_I64)) extcache_promote_double(dyn, ninst, st); - #endif + else if (t == EXT_CACHE_ST_I64 && (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_F)) + extcache_promote_double(dyn, ninst, st); + else if (t == EXT_CACHE_ST_F && (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_I64)) + extcache_promote_double(dyn, ninst, st); +#endif return i; } assert(dyn->e.x87cache[i]<8); @@ -1122,8 +1132,10 @@ int x87_get_cache(dynarec_rv64_t* dyn, int ninst, int populate, int s1, int s2, int x87_get_extcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) { for(int ii=0; ii<24; ++ii) - if((dyn->e.extcache[ii].t == EXT_CACHE_ST_F || dyn->e.extcache[ii].t == EXT_CACHE_ST_D) - && dyn->e.extcache[ii].n==st) + if ((dyn->e.extcache[ii].t == EXT_CACHE_ST_F + || dyn->e.extcache[ii].t == EXT_CACHE_ST_D + || dyn->e.extcache[ii].t == EXT_CACHE_ST_I64) + && dyn->e.extcache[ii].n == st) return ii; assert(0); return -1; @@ -1161,6 +1173,10 @@ void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_F) { FCVTDS(SCRATCH0, reg); FSD(SCRATCH0, s1, offsetof(x64emu_t, x87)); + } else if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_I64) { + FMVXD(s2, reg); + FCVTDL(SCRATCH0, s2, RD_RTZ); + FSD(SCRATCH0, s1, offsetof(x64emu_t, x87)); } else { FSD(reg, s1, offsetof(x64emu_t, x87)); } @@ -1179,7 +1195,8 @@ void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) MESSAGE(LOG_DUMP, "\tForget x87 Cache for ST%d\n", st); const int reg = dyn->e.x87reg[ret]; #if STEP == 1 - if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_F) + if (dyn->e.extcache[EXTIDX(dyn->e.x87reg[ret])].t == EXT_CACHE_ST_F + || dyn->e.extcache[EXTIDX(dyn->e.x87reg[ret])].t == EXT_CACHE_ST_I64) extcache_promote_double(dyn, ninst, st); #endif // prepare offset to fpu => s1 @@ -1195,6 +1212,10 @@ void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_F) { FCVTDS(SCRATCH0, reg); FSD(SCRATCH0, s1, offsetof(x64emu_t, x87)); + } else if (dyn->e.extcache[EXTIDX(reg)].t == EXT_CACHE_ST_I64) { + FMVXD(s2, reg); + FCVTDL(SCRATCH0, s2, RD_RTZ); + FSD(SCRATCH0, s1, offsetof(x64emu_t, x87)); } else { FSD(reg, s1, offsetof(x64emu_t, x87)); } @@ -1216,7 +1237,8 @@ void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st) // refresh the value MESSAGE(LOG_DUMP, "\tRefresh x87 Cache for ST%d\n", st); #if STEP == 1 - if(dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t==EXT_CACHE_ST_F) + if (dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_F + || dyn->e.extcache[EXTIDX(dyn->e.x87reg[i])].t == EXT_CACHE_ST_I64) extcache_promote_double(dyn, ninst, st); #endif LW(s2, xEmu, offsetof(x64emu_t, top)); @@ -1634,18 +1656,29 @@ void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, in static int findCacheSlot(dynarec_rv64_t* dyn, int ninst, int t, int n, extcache_t* cache) { ext_cache_t f; - f.n = n; f.t = t; - for(int i=0; i<24; ++i) { - if(cache->extcache[i].v == f.v) + f.n = n; + f.t = t; + for (int i = 0; i < 24; ++i) { + if (cache->extcache[i].v == f.v) return i; - if(cache->extcache[i].n == n) { - switch(cache->extcache[i].t) { + if (cache->extcache[i].n == n) { + switch (cache->extcache[i].t) { case EXT_CACHE_ST_F: - if (t==EXT_CACHE_ST_D) + if (t == EXT_CACHE_ST_D) + return i; + if (t == EXT_CACHE_ST_I64) return i; break; case EXT_CACHE_ST_D: - if (t==EXT_CACHE_ST_F) + if (t == EXT_CACHE_ST_F) + return i; + if (t == EXT_CACHE_ST_I64) + return i; + break; + case EXT_CACHE_ST_I64: + if (t == EXT_CACHE_ST_F) + return i; + if (t == EXT_CACHE_ST_D) return i; break; } @@ -1703,7 +1736,7 @@ static void swapCache(dynarec_rv64_t* dyn, int ninst, int i, int j, extcache_t * cache->extcache[j].v = tmp.v; } -static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int s2, int s3, int* s1_val, int* s2_val, int* s3_top, extcache_t *cache, int i, int t, int n) +static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int s2, int s3, int* s1_val, int* s2_val, int* s3_top, extcache_t* cache, int i, int t, int n) { int reg = EXTREG(i); if(cache->extcache[i].v) { @@ -1738,6 +1771,7 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int break; case EXT_CACHE_ST_D: case EXT_CACHE_ST_F: + case EXT_CACHE_ST_I64: MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); if((*s3_top) == 0xffff) { LW(s3, xEmu, offsetof(x64emu_t, top)); @@ -1755,6 +1789,10 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int if(t==EXT_CACHE_ST_F) { FCVTSD(reg, reg); } + if (t == EXT_CACHE_ST_I64) { + FCVTLD(s1, reg, RD_RTZ); + FMVDX(reg, s1); + } break; case EXT_CACHE_NONE: case EXT_CACHE_SCR: @@ -1784,6 +1822,7 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i break; case EXT_CACHE_ST_D: case EXT_CACHE_ST_F: + case EXT_CACHE_ST_I64: MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); if((*s3_top)==0xffff) { LW(s3, xEmu, offsetof(x64emu_t, top)); @@ -1797,9 +1836,13 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i *s3_top += a; if(rv64_zba) SH3ADD(s2, s3, xEmu); else {SLLI(s2, s3, 3); ADD(s2, xEmu, s2);} *s2_val = 0; - if(t==EXT_CACHE_ST_F) { + if (t == EXT_CACHE_ST_F) { FCVTDS(reg, reg); } + if (t == EXT_CACHE_ST_I64) { + FMVXD(s1, reg); + FCVTDL(reg, s1, RD_RTZ); + } FSD(reg, s2, offsetof(x64emu_t, x87)); break; case EXT_CACHE_NONE: @@ -1936,6 +1979,26 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n)); FCVTDS(EXTREG(i), EXTREG(i)); cache.extcache[i].t = EXT_CACHE_ST_D; + } else if (cache.extcache[i].t == EXT_CACHE_ST_D && cache_i2.extcache[i].t == EXT_CACHE_ST_I64) { + MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n)); + FCVTLD(s1, EXTREG(i), RD_RTZ); + FMVDX(EXTREG(i), s1); + cache.extcache[i].t = EXT_CACHE_ST_I64; + } else if (cache.extcache[i].t == EXT_CACHE_ST_F && cache_i2.extcache[i].t == EXT_CACHE_ST_I64) { + MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n)); + FCVTLS(s1, EXTREG(i), RD_RTZ); + FMVDX(EXTREG(i), s1); + cache.extcache[i].t = EXT_CACHE_ST_D; + } else if (cache.extcache[i].t == EXT_CACHE_ST_I64 && cache_i2.extcache[i].t == EXT_CACHE_ST_F) { + MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n)); + FMVXD(s1, EXTREG(i)); + FCVTSL(EXTREG(i), s1, RD_RTZ); + cache.extcache[i].t = EXT_CACHE_ST_F; + } else if (cache.extcache[i].t == EXT_CACHE_ST_I64 && cache_i2.extcache[i].t == EXT_CACHE_ST_D) { + MESSAGE(LOG_DUMP, "\t - Convert %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n)); + FMVXD(s1, EXTREG(i)); + FCVTDL(EXTREG(i), s1, RD_RTZ); + cache.extcache[i].t = EXT_CACHE_ST_D; } } } @@ -2124,7 +2187,9 @@ void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst) { if(dyn->e.stack_pop) { for(int j=0; j<24; ++j) - if((dyn->e.extcache[j].t == EXT_CACHE_ST_D || dyn->e.extcache[j].t == EXT_CACHE_ST_F)) { + if ((dyn->e.extcache[j].t == EXT_CACHE_ST_D + || dyn->e.extcache[j].t == EXT_CACHE_ST_F + || dyn->e.extcache[j].t == EXT_CACHE_ST_I64)) { if(dyn->e.extcache[j].n<dyn->e.stack_pop) dyn->e.extcache[j].v = 0; else diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 140d90c1..3fb8e654 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -1338,16 +1338,19 @@ int extcache_st_coherency(dynarec_rv64_t* dyn, int ninst, int a, int b); #if STEP == 0 #define ST_IS_F(A) 0 +#define ST_IS_I64(A) 0 #define X87_COMBINE(A, B) EXT_CACHE_ST_D #define X87_ST0 EXT_CACHE_ST_D #define X87_ST(A) EXT_CACHE_ST_D #elif STEP == 1 #define ST_IS_F(A) (extcache_get_current_st(dyn, ninst, A) == EXT_CACHE_ST_F) +#define ST_IS_I64(A) (extcache_get_current_st(dyn, ninst, A) == EXT_CACHE_ST_I64) #define X87_COMBINE(A, B) extcache_combine_st(dyn, ninst, A, B) -#define X87_ST0 extcache_get_current_st(dyn, ninst, 0) -#define X87_ST(A) extcache_get_current_st(dyn, ninst, A) +#define X87_ST0 extcache_no_i64(dyn, ninst, 0, extcache_get_current_st(dyn, ninst, 0)) +#define X87_ST(A) extcache_no_i64(dyn, ninst, A, extcache_get_current_st(dyn, ninst, A)) #else #define ST_IS_F(A) (extcache_get_st(dyn, ninst, A) == EXT_CACHE_ST_F) +#define ST_IS_I64(A) (extcache_get_st(dyn, ninst, A) == EXT_CACHE_ST_I64) #if STEP == 3 #define X87_COMBINE(A, B) extcache_st_coherency(dyn, ninst, A, B) #else diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 907de0a3..6cd32413 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -10,13 +10,14 @@ typedef struct instsize_s instsize_t; #define BARRIER_MAYBE 8 -#define EXT_CACHE_NONE 0 -#define EXT_CACHE_ST_D 1 -#define EXT_CACHE_ST_F 2 -#define EXT_CACHE_MM 3 -#define EXT_CACHE_SS 4 -#define EXT_CACHE_SD 5 -#define EXT_CACHE_SCR 6 +#define EXT_CACHE_NONE 0 +#define EXT_CACHE_ST_D 1 +#define EXT_CACHE_ST_F 2 +#define EXT_CACHE_ST_I64 3 +#define EXT_CACHE_MM 4 +#define EXT_CACHE_SS 5 +#define EXT_CACHE_SD 6 +#define EXT_CACHE_SCR 7 typedef union ext_cache_s { int8_t v; struct { |