diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 112 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.h | 9 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 6 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 2 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native.c | 83 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.c | 28 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_functions.h | 7 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 3 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_private.h | 5 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c | 41 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_functions.c | 6 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.c | 131 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 13 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 9 |
14 files changed, 131 insertions, 324 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 15c0f3f0..00c34afc 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -1148,6 +1148,7 @@ void updateNativeFlags(dynarec_native_t* dyn) void rasNativeState(dynarec_arm_t* dyn, int ninst) { dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].set_nat_flags = dyn->insts[ninst].use_nat_flags = dyn->insts[ninst].need_nat_flags = 0; + dyn->insts[ninst].ymm0_in = dyn->insts[ninst].ymm0_sub = dyn->insts[ninst].ymm0_add = dyn->insts[ninst].ymm0_out = dyn->insts[ninst].purge_ymm = 0; } int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst) @@ -1276,4 +1277,113 @@ void propagateFpuBarrier(dynarec_arm_t* dyn) last_fpu_used = -1; // reset the last_fpu_used... } } -} \ No newline at end of file +} + + +void updateYmm0s(dynarec_arm_t* dyn, int ninst, int max_ninst_reached) +{ + int can_incr = ninst == max_ninst_reached; // Are we the top-level call? + int ok = 1; + while ((can_incr || ok) && ninst < dyn->size) { + // if(dyn->need_dump) dynarec_log(LOG_NONE, "update ninst=%d (%d): can_incr=%d\n", ninst, max_ninst_reached, can_incr); + uint16_t new_purge_ymm, new_ymm0_in, new_ymm0_out; + + if (dyn->insts[ninst].pred_sz && dyn->insts[ninst].x64.alive) { + // The union of the empty set is empty (0), the intersection is the universe (-1) + // The first instruction is the entry point, which has a virtual pred with ymm0_out = 0 + // Similarly, float barriers reset ymm0s + uint16_t ymm0_union = 0; + uint16_t ymm0_inter = (ninst && !(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) ? ((uint16_t)-1) : (uint16_t)0; + for (int i = 0; i < dyn->insts[ninst].pred_sz; ++i) { + int pred = dyn->insts[ninst].pred[i]; + // if(dyn->need_dump) dynarec_log(LOG_NONE, "\twith pred[%d] = %d", i, pred); + if (pred >= max_ninst_reached) { + // if(dyn->need_dump) dynarec_log(LOG_NONE, " (skipped)\n"); + continue; + } + + int pred_out = dyn->insts[pred].x64.has_callret ? 0 : dyn->insts[pred].ymm0_out; + // if(dyn->need_dump) dynarec_log(LOG_NONE, " ~> %04X\n", pred_out); + ymm0_union |= pred_out; + ymm0_inter &= pred_out; + } + // if(dyn->need_dump) dynarec_log(LOG_NONE, "\t=> %04X,%04X\n", ymm0_union, ymm0_inter); + // Notice the default values yield something coherent here (if all pred are after ninst) + new_purge_ymm = ymm0_union & ~ymm0_inter; + new_ymm0_in = ymm0_inter; + new_ymm0_out = (ymm0_inter | dyn->insts[ninst].ymm0_add) & ~dyn->insts[ninst].ymm0_sub; + + if ((dyn->insts[ninst].purge_ymm != new_purge_ymm) || (dyn->insts[ninst].ymm0_in != new_ymm0_in) || (dyn->insts[ninst].ymm0_out != new_ymm0_out)) { + // Need to update self and next(s) + dyn->insts[ninst].purge_ymm = new_purge_ymm; + dyn->insts[ninst].ymm0_in = new_ymm0_in; + dyn->insts[ninst].ymm0_out = new_ymm0_out; + + if (can_incr) { + // We always have ninst == max_ninst_reached when can_incr == 1 + ++max_ninst_reached; + } else { + // We need to stop here if the opcode has no "real" next or if we reached the ninst of the toplevel + ok = (max_ninst_reached - 1 != ninst) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].x64.has_callret; + } + + int jmp = (dyn->insts[ninst].x64.jmp) ? dyn->insts[ninst].x64.jmp_insts : -1; + if ((jmp != -1) && (jmp < max_ninst_reached)) { + // if(dyn->need_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp); + // The jump goes before the last instruction reached, update the destination + // If this is the top level call, this means the jump goes backward (jmp != ninst) + // Otherwise, since we don't update all instructions, we may miss the update (don't use jmp < ninst) + updateYmm0s(dyn, jmp, max_ninst_reached); + } + } else { + if (can_incr) { + // We always have ninst == max_ninst_reached when can_incr == 1 + ++max_ninst_reached; + + // Also update jumps to before (they are skipped otherwise) + int jmp = (dyn->insts[ninst].x64.jmp) ? dyn->insts[ninst].x64.jmp_insts : -1; + if ((jmp != -1) && (jmp < max_ninst_reached)) { + // if(dyn->need_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp); + updateYmm0s(dyn, jmp, max_ninst_reached); + } + } else { + // We didn't update anything, we can leave + ok = 0; + } + } + } else if (can_incr) { + // We always have ninst == max_ninst_reached when can_incr == 1 + ++max_ninst_reached; + } else { + // We didn't update anything, we can leave + ok = 0; + } + ++ninst; + } +} + + +// AVX helpers +void avx_mark_zero(dynarec_arm_t* dyn, int ninst, int reg) +{ + dyn->ymm_zero |= (1<<reg); +} + +int is_avx_zero(dynarec_arm_t* dyn, int ninst, int reg) +{ + return (dyn->ymm_zero>>reg)&1; +} + +int is_avx_zero_unset(dynarec_arm_t* dyn, int ninst, int reg) +{ + if((dyn->ymm_zero>>reg)&1) { + dyn->ymm_zero &= ~(1<<reg); + return 1; + } + return 0; +} + +void avx_mark_zero_reset(dynarec_arm_t* dyn, int ninst) +{ + dyn->ymm_zero = 0; +} diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index d32dbddd..a197eeec 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -90,4 +90,13 @@ int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st); void propagateFpuBarrier(dynarec_arm_t* dyn); // propage the uneeded flags on XMM/YMM regs (done between step 0 and step 1) void updateUneeded(dynarec_arm_t* dyn); + +void updateYmm0s(dynarec_arm_t* dyn, int ninst, int max_ninst_reached); + +// AVX helpers +void avx_mark_zero(dynarec_arm_t* dyn, int ninst, int reg); +int is_avx_zero(dynarec_arm_t* dyn, int ninst, int reg); +int is_avx_zero_unset(dynarec_arm_t* dyn, int ninst, int reg); +void avx_mark_zero_reset(dynarec_arm_t* dyn, int ninst); + #endif //__DYNAREC_ARM_FUNCTIONS_H__ diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 040031fc..8ba2407f 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -2002,6 +2002,10 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip } \ } -#define PURGE_YMM() avx_purge_ymm(dyn, ninst, dyn->insts[ninst+1].purge_ymm, x1) +#define PURGE_YMM() \ + do { \ + if ((ok > 0) && reset_n == -1 && dyn->insts[ninst + 1].purge_ymm) \ + avx_purge_ymm(dyn, ninst, dyn->insts[ninst + 1].purge_ymm, x1); \ + } while (0) #endif //__DYNAREC_ARM64_HELPER_H__ diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 994eebc3..761f8166 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -24,7 +24,7 @@ #define MAXBLOCK_SIZE ((1<<20)-200) #define RAZ_SPECIFIC(A, N) rasNativeState(A, N) -#define UPDATE_SPECIFICS(A) updateNativeFlags(A); propagateFpuBarrier(A) +#define UPDATE_SPECIFICS(A) updateYmm0s(dyn, 0, 0); updateNativeFlags(A); propagateFpuBarrier(A) #define PREUPDATE_SPECIFICS(A) #define POSTUPDATE_SPECIFICS(A) updateUneeded(A) #define ARCH_SIZE(A) get_size_arch(A) diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index c92f251f..17ad5a02 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -444,87 +444,6 @@ static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) { return ninst; } -static void updateYmm0s(dynarec_native_t* dyn, int ninst, int max_ninst_reached) { - int can_incr = ninst == max_ninst_reached; // Are we the top-level call? - int ok = 1; - while ((can_incr || ok) && ninst<dyn->size) { - //if(dyn->need_dump) dynarec_log(LOG_NONE, "update ninst=%d (%d): can_incr=%d\n", ninst, max_ninst_reached, can_incr); - uint16_t new_purge_ymm, new_ymm0_in, new_ymm0_out; - - if (dyn->insts[ninst].pred_sz && dyn->insts[ninst].x64.alive) { - // The union of the empty set is empty (0), the intersection is the universe (-1) - // The first instruction is the entry point, which has a virtual pred with ymm0_out = 0 - // Similarly, float barriers reset ymm0s - uint16_t ymm0_union = 0; - uint16_t ymm0_inter = (ninst && !(dyn->insts[ninst].x64.barrier & BARRIER_FLOAT)) ? ((uint16_t)-1) : (uint16_t)0; - for (int i = 0; i < dyn->insts[ninst].pred_sz; ++i) { - int pred = dyn->insts[ninst].pred[i]; - //if(dyn->need_dump) dynarec_log(LOG_NONE, "\twith pred[%d] = %d", i, pred); - if (pred >= max_ninst_reached) { - //if(dyn->need_dump) dynarec_log(LOG_NONE, " (skipped)\n"); - continue; - } - - int pred_out = dyn->insts[pred].x64.has_callret ? 0 : dyn->insts[pred].ymm0_out; - //if(dyn->need_dump) dynarec_log(LOG_NONE, " ~> %04X\n", pred_out); - ymm0_union |= pred_out; - ymm0_inter &= pred_out; - } - //if(dyn->need_dump) dynarec_log(LOG_NONE, "\t=> %04X,%04X\n", ymm0_union, ymm0_inter); - // Notice the default values yield something coherent here (if all pred are after ninst) - new_purge_ymm = ymm0_union & ~ymm0_inter; - new_ymm0_in = ymm0_inter; - new_ymm0_out = (ymm0_inter | dyn->insts[ninst].ymm0_add) & ~dyn->insts[ninst].ymm0_sub; - - if ((dyn->insts[ninst].purge_ymm != new_purge_ymm) || (dyn->insts[ninst].ymm0_in != new_ymm0_in) || (dyn->insts[ninst].ymm0_out != new_ymm0_out)) { - // Need to update self and next(s) - dyn->insts[ninst].purge_ymm = new_purge_ymm; - dyn->insts[ninst].ymm0_in = new_ymm0_in; - dyn->insts[ninst].ymm0_out = new_ymm0_out; - - if (can_incr) { - // We always have ninst == max_ninst_reached when can_incr == 1 - ++max_ninst_reached; - } else { - // We need to stop here if the opcode has no "real" next or if we reached the ninst of the toplevel - ok = (max_ninst_reached - 1 != ninst) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].x64.has_callret; - } - - int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1; - if((jmp!=-1) && (jmp < max_ninst_reached)) { - //if(dyn->need_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp); - // The jump goes before the last instruction reached, update the destination - // If this is the top level call, this means the jump goes backward (jmp != ninst) - // Otherwise, since we don't update all instructions, we may miss the update (don't use jmp < ninst) - updateYmm0s(dyn, jmp, max_ninst_reached); - } - } else { - if (can_incr) { - // We always have ninst == max_ninst_reached when can_incr == 1 - ++max_ninst_reached; - - // Also update jumps to before (they are skipped otherwise) - int jmp = (dyn->insts[ninst].x64.jmp)?dyn->insts[ninst].x64.jmp_insts:-1; - if((jmp!=-1) && (jmp < max_ninst_reached)) { - //if(dyn->need_dump) dynarec_log(LOG_NONE, "\t! jump to %d\n", jmp); - updateYmm0s(dyn, jmp, max_ninst_reached); - } - } else { - // We didn't update anything, we can leave - ok = 0; - } - } - } else if (can_incr) { - // We always have ninst == max_ninst_reached when can_incr == 1 - ++max_ninst_reached; - } else { - // We didn't update anything, we can leave - ok = 0; - } - ++ninst; - } -} - void* current_helper = NULL; static int static_jmps[MAX_INSTS+2]; static uintptr_t static_next[MAX_INSTS+2]; @@ -779,7 +698,6 @@ dynablock_t* FillBlock64(uintptr_t addr, int alternate, int is32bits, int inst_m int ii = i; while(ii<helper.size && !helper.insts[ii].pred_sz) { fpu_reset_ninst(&helper, ii); - helper.insts[ii].ymm0_in = helper.insts[ii].ymm0_sub = helper.insts[ii].ymm0_add = helper.insts[ii].ymm0_out = helper.insts[ii].purge_ymm = 0; RAZ_SPECIFIC(&helper, ii); ++ii; } @@ -796,7 +714,6 @@ dynablock_t* FillBlock64(uintptr_t addr, int alternate, int is32bits, int inst_m CancelBlock64(0); return CreateEmptyBlock(addr, is32bits, is_new); } - updateYmm0s(&helper, 0, 0); UPDATE_SPECIFICS(&helper); // check for still valid close loop for(int ii=0; ii<helper.jmp_sz && !helper.always_test; ++ii) { diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c index bb696ac2..79e558d6 100644 --- a/src/dynarec/dynarec_native_functions.c +++ b/src/dynarec/dynarec_native_functions.c @@ -620,34 +620,6 @@ uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nexto } #undef F8 -// AVX -void avx_mark_zero(dynarec_native_t* dyn, int ninst, int reg) -{ - dyn->ymm_zero |= (1<<reg); -} - -int is_avx_zero(dynarec_native_t* dyn, int ninst, int reg) -{ - return (dyn->ymm_zero>>reg)&1; -} -int is_avx_zero_unset(dynarec_native_t* dyn, int ninst, int reg) -{ - if((dyn->ymm_zero>>reg)&1) { - dyn->ymm_zero &= ~(1<<reg); - return 1; - } - return 0; -} -void avx_mark_zero_reset(dynarec_native_t* dyn, int ninst) -{ - dyn->ymm_zero = 0; -} - -void avx_unmark_zero(dynarec_native_t* dyn, int ninst, int reg) -{ - dyn->ymm_zero &= ~(1<<reg); -} - void propagate_nodf(dynarec_native_t* dyn, int ninst) { while(ninst>=0) { diff --git a/src/dynarec/dynarec_native_functions.h b/src/dynarec/dynarec_native_functions.h index eca8568f..9601fc18 100644 --- a/src/dynarec/dynarec_native_functions.h +++ b/src/dynarec/dynarec_native_functions.h @@ -74,13 +74,6 @@ uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nexto // return Ib on a mod/rm opcode without emitting anything uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); -// AVX utilities -void avx_mark_zero(dynarec_native_t* dyn, int ninst, int reg); -int is_avx_zero(dynarec_native_t* dyn, int ninst, int reg); -int is_avx_zero_unset(dynarec_native_t* dyn, int ninst, int reg); -void avx_mark_zero_reset(dynarec_native_t* dyn, int ninst); -void avx_unmark_zero(dynarec_native_t* dyn, int ninst, int reg); - typedef struct register_mapping_s { const char* name; const char* native; diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index cb7da3fb..b88cfb41 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -346,8 +346,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int } if((ok>0) && dyn->insts[ninst].x64.has_callret) reset_n = -2; - if((ok>0) && reset_n==-1 && dyn->insts[ninst+1].purge_ymm) - PURGE_YMM(); + PURGE_YMM(); ++ninst; #if STEP == 0 memset(&dyn->insts[ninst], 0, sizeof(instruction_native_t)); diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index a71557cc..f5cf5f29 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -100,11 +100,6 @@ typedef struct instruction_la64_s { int pass2choice;// value for choices that are fixed on pass2 for pass3 uintptr_t natcall; uint16_t retn; - uint16_t purge_ymm; // need to purge some ymm - uint16_t ymm0_in; // bitmap of ymm to zero at purge - uint16_t ymm0_add; // the ymm0 added by the opcode - uint16_t ymm0_sub; // the ymm0 removed by the opcode - uint16_t ymm0_out; // the ymm0 at th end of the opcode uint16_t ymm0_pass2, ymm0_pass3; uint8_t barrier_maybe; uint8_t will_write:2; // [strongmem] will write to memory diff --git a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c index 6bbc746d..b45f7abb 100644 --- a/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_avx_f3_0f.c @@ -46,47 +46,6 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, rex_t rex = vex.rex; switch (opcode) { - case 0x10: - INST_NAME("VMOVSS Gx, [Vx,] Ex"); - nextop = F8; - GETG; - if (MODREG) { - if (gd == vex.v) { - v0 = sse_get_reg(dyn, ninst, x1, gd, 1); - q0 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1); - FMVS(v0, q0); - } else { - GETGX(); - GETVX(); - GETEX(x2, 0, 1); - if (cpuext.xtheadmempair) { - ADD(x1, vback, vxoffset); - TH_LDD(x3, x4, x1, 0); - } else { - LD(x3, vback, vxoffset); - LD(x4, vback, vxoffset + 8); - } - LWU(x5, wback, fixedaddress); - if (cpuext.xtheadmempair) { - ADDI(x1, gback, gdoffset); - TH_SDD(x3, x4, x1, 0); - } else { - SD(x3, gback, gdoffset); - SD(x4, gback, gdoffset + 8); - } - SW(x5, gback, gdoffset); - } - } else { - v0 = sse_get_reg_empty(dyn, ninst, x1, gd, 1); - SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0); - FLW(v0, ed, fixedaddress); - // reset upper part - SW(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 4); - SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd]) + 8); - } - YMM0(gd); - break; default: DEFAULT; } diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c index e86e0a0c..86f34710 100644 --- a/src/dynarec/rv64/dynarec_rv64_functions.c +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -521,8 +521,6 @@ void extcacheUnwind(extcache_t* cache) break; case EXT_CACHE_XMMR: case EXT_CACHE_XMMW: - case EXT_CACHE_YMMR: - case EXT_CACHE_YMMW: cache->ssecache[cache->extcache[i].n].reg = EXTREG(i); cache->ssecache[cache->extcache[i].n].vector = 1; cache->ssecache[cache->extcache[i].n].write = (cache->extcache[i].t == EXT_CACHE_XMMW) ? 1 : 0; @@ -612,8 +610,6 @@ const char* getCacheName(int t, int n) case EXT_CACHE_SCR: sprintf(buff, "Scratch"); break; case EXT_CACHE_XMMW: sprintf(buff, "XMM%d", n); break; case EXT_CACHE_XMMR: sprintf(buff, "xmm%d", n); break; - case EXT_CACHE_YMMW: sprintf(buff, "YMM%d", n); break; - case EXT_CACHE_YMMR: sprintf(buff, "ymm%d", n); break; case EXT_CACHE_NONE: buff[0] = '\0'; break; } return buff; @@ -734,8 +730,6 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r case EXT_CACHE_SD: length += sprintf(buf + length, " f%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_XMMR: length += sprintf(buf + length, " v%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_XMMW: length += sprintf(buf + length, " v%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; - case EXT_CACHE_YMMW: length += sprintf(buf + length, " v%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; - case EXT_CACHE_YMMR: length += sprintf(buf + length, " v%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_SCR: length += sprintf(buf + length, " f%d:%s", EXTREG(ii), getCacheName(dyn->insts[ninst].e.extcache[ii].t, dyn->insts[ninst].e.extcache[ii].n)); break; case EXT_CACHE_NONE: default: break; diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c index 13f87c85..17f02a17 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.c +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -2061,25 +2061,6 @@ static void sse_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1) } } - // AVX - if (dyn->ymm_zero) { - if (old == -1) { - MESSAGE(LOG_DUMP, "\tPurge %sSSE Cache ------\n", next ? "locally " : ""); - ++old; - } - for (int i = 0; i < 16; ++i) - if (is_avx_zero(dyn, ninst, i)) { - if (cpuext.xtheadmempair) { - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); - TH_SDD(xZR, xZR, s1, 0); - } else { - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i])); - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8); - } - } - if (!next) - avx_mark_zero_reset(dyn, ninst); - } if (old != -1) { MESSAGE(LOG_DUMP, "\t------ Purge SSE Cache\n"); } @@ -2098,32 +2079,10 @@ static void sse_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1) else FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); } - - // AVX - if (dyn->ymm_zero) - for (int i = 0; i < 16; ++i) - if (is_avx_zero(dyn, ninst, i)) { - if (cpuext.xtheadmempair) { - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); - TH_SDD(xZR, xZR, s1, 0); - } else { - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i])); - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8); - } - } } void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) { - if (is_avx_zero(dyn, ninst, a)) { - if (cpuext.xtheadmempair) { - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[a])); - TH_SDD(xZR, xZR, s1, 0); - } else { - SD(xZR, xEmu, offsetof(x64emu_t, ymm[a])); - SD(xZR, xEmu, offsetof(x64emu_t, ymm[a]) + 8); - } - } if (dyn->e.ssecache[a].v == -1) return; if (dyn->e.ssecache[a].vector) { @@ -2136,14 +2095,6 @@ void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a) FSD(dyn->e.ssecache[a].reg, xEmu, offsetof(x64emu_t, xmm[a])); } -void ymm_mark_zero(dynarec_rv64_t* dyn, int ninst, int a) -{ -#if STEP == 0 - dyn->insts[ninst].ymm0_add |= (1 << a); -#endif - avx_mark_zero(dyn, ninst, a); -} - void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) { // for float registers, we might lost f0..f7, f10..f17 and f28..f31, that means @@ -2162,15 +2113,6 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) FSW(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); else FSD(dyn->e.ssecache[i].reg, xEmu, offsetof(x64emu_t, xmm[i])); - if (is_avx_zero(dyn, ninst, i)) { - if (cpuext.xtheadmempair) { - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); - TH_SDD(xZR, xZR, s1, 0); - } else { - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i])); - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8); - } - } } MESSAGE(LOG_DUMP, "\t------- Push (float) XMM Cache (%d)\n", n); } @@ -2210,15 +2152,6 @@ void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) ADDI(s1, xEmu, offsetof(x64emu_t, xmm[i])); VSE_V(dyn->e.ssecache[i].reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); } - if (is_avx_zero(dyn, ninst, i)) { - if (cpuext.xtheadmempair) { - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); - TH_SDD(xZR, xZR, s1, 0); - } else { - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i])); - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8); - } - } } MESSAGE(LOG_DUMP, "\t------- Push (vector) XMM Cache (%d)\n", n); } @@ -2330,10 +2263,6 @@ int fpu_needpurgecache(dynarec_rv64_t* dyn, int ninst) // sse for (int i = 0; i < 16; ++i) if (dyn->e.ssecache[i].v != -1) return 1; - // avx - if (dyn->ymm_zero) - for (int i = 0; i < 16; ++i) - if (is_avx_zero(dyn, ninst, i)) return 1; return 0; } @@ -2372,13 +2301,6 @@ static int findCacheSlot(dynarec_rv64_t* dyn, int ninst, int t, int n, extcache_ case EXT_CACHE_XMMW: if (t == EXT_CACHE_XMMR) return i; - case EXT_CACHE_YMMR: - if (t == EXT_CACHE_YMMW) - return i; - break; - case EXT_CACHE_YMMW: - if (t == EXT_CACHE_YMMR) - return i; break; } } @@ -2391,9 +2313,7 @@ static void swapCache(dynarec_rv64_t* dyn, int ninst, int i, int j, extcache_t* if (i == j) return; if (cache->extcache[i].t == EXT_CACHE_XMMR || cache->extcache[i].t == EXT_CACHE_XMMW - || cache->extcache[j].t == EXT_CACHE_XMMR || cache->extcache[j].t == EXT_CACHE_XMMW - || cache->extcache[i].t == EXT_CACHE_YMMR || cache->extcache[i].t == EXT_CACHE_YMMW - || cache->extcache[j].t == EXT_CACHE_YMMR || cache->extcache[j].t == EXT_CACHE_YMMW) { + || cache->extcache[j].t == EXT_CACHE_XMMR || cache->extcache[j].t == EXT_CACHE_XMMW) { int reg_i = EXTREG(i); int reg_j = EXTREG(j); if (!cache->extcache[i].v) { @@ -2451,7 +2371,7 @@ static void swapCache(dynarec_rv64_t* dyn, int ninst, int i, int j, extcache_t* static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int s2, int s3, int* s1_val, int* s2_val, int* s3_top, extcache_t* cache, int i, int t, int n) { int reg = EXTREG(i); - if (cache->extcache[i].v && (cache->extcache[i].t == EXT_CACHE_XMMR || cache->extcache[i].t == EXT_CACHE_XMMW || cache->extcache[i].t == EXT_CACHE_YMMR || cache->extcache[i].t == EXT_CACHE_YMMW)) { + if (cache->extcache[i].v && (cache->extcache[i].t == EXT_CACHE_XMMR || cache->extcache[i].t == EXT_CACHE_XMMW)) { int j = i + 1; while (cache->extcache[j].v) ++j; @@ -2483,13 +2403,6 @@ static void loadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, int ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n])); VLE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); break; - case EXT_CACHE_YMMR: - case EXT_CACHE_YMMW: - MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); - SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[n])); - VLE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); - break; case EXT_CACHE_SS: MESSAGE(LOG_DUMP, "\t - Loading %s\n", getCacheName(t, n)); FLW(reg, xEmu, offsetof(x64emu_t, xmm[n])); @@ -2552,7 +2465,6 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i int reg = EXTREG(i); switch (t) { case EXT_CACHE_XMMR: - case EXT_CACHE_YMMR: MESSAGE(LOG_DUMP, "\t - ignoring %s\n", getCacheName(t, n)); break; case EXT_CACHE_XMMW: @@ -2561,12 +2473,6 @@ static void unloadCache(dynarec_rv64_t* dyn, int ninst, int stack_cnt, int s1, i ADDI(s1, xEmu, offsetof(x64emu_t, xmm[n])); VSE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); break; - case EXT_CACHE_YMMW: - MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); - SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[n])); - VSE_V(reg, s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); - break; case EXT_CACHE_SS: MESSAGE(LOG_DUMP, "\t - Unloading %s\n", getCacheName(t, n)); FSW(reg, xEmu, offsetof(x64emu_t, xmm[n])); @@ -2733,8 +2639,6 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in cache.extcache[i].t = EXT_CACHE_ST_D; } else if (cache.extcache[i].t == EXT_CACHE_XMMR && cache_i2.extcache[i].t == EXT_CACHE_XMMW) { cache.extcache[i].t = EXT_CACHE_XMMW; - } else if (cache.extcache[i].t == EXT_CACHE_YMMR && cache_i2.extcache[i].t == EXT_CACHE_YMMW) { - cache.extcache[i].t = EXT_CACHE_YMMW; } else if (cache.extcache[i].t == EXT_CACHE_XMMW && cache_i2.extcache[i].t == EXT_CACHE_XMMR) { // refresh cache... MESSAGE(LOG_DUMP, "\t - Refreh %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n)); @@ -2742,13 +2646,6 @@ static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, in ADDI(s1, xEmu, offsetof(x64emu_t, xmm[cache.extcache[i].n])); VSE_V(EXTREG(i), s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); cache.extcache[i].t = EXT_CACHE_XMMR; - } else if (cache.extcache[i].t == EXT_CACHE_YMMW && cache_i2.extcache[i].t == EXT_CACHE_YMMR) { - // refresh cache... - MESSAGE(LOG_DUMP, "\t - Refreh %s\n", getCacheName(cache.extcache[i].t, cache.extcache[i].n)); - SET_ELEMENT_WIDTH(s1, VECTOR_SEWANY, 0); - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[cache.extcache[i].n])); - VSE_V(EXTREG(i), s1, dyn->vector_eew, VECTOR_UNMASKED, VECTOR_NFIELD1); - cache.extcache[i].t = EXT_CACHE_YMMR; } } } @@ -3256,27 +3153,3 @@ void vector_loadmask(dynarec_rv64_t* dyn, int ninst, int vreg, uint64_t imm, int } #endif } - - -void avx_purge_ymm(dynarec_rv64_t* dyn, int ninst, uint16_t mask, int s1) -{ - int do_something = 0; - for (int i = 0; i < 16; ++i) - if (mask & (1 << i)) { - if (is_avx_zero_unset(dyn, ninst, i)) { - if (!do_something) { - MESSAGE(LOG_NONE, "Purge YMM mask=%04x --------\n", mask); - do_something = 1; - } - if (cpuext.xtheadmempair) { - ADDI(s1, xEmu, offsetof(x64emu_t, ymm[i])); - TH_SDD(xZR, xZR, s1, 0); - } else { - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i])); - SD(xZR, xEmu, offsetof(x64emu_t, ymm[i]) + 8); - } - } - } - if (do_something) - MESSAGE(LOG_NONE, "---------- Purge YMM\n"); -} diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 0d3a976e..e665d420 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -397,8 +397,6 @@ #define GB_EQ_EB() (MODREG && ((nextop & 0x38) >> 3) == (nextop & 7) && (rex.r == rex.b)) -#define YMM0(a) ymm_mark_zero(dyn, ninst, a); - // Get direction with size Z and based of F_DF flag, on register r ready for load/store fetching // using s as scratch. #define GETDIR(r, s, Z) \ @@ -1408,8 +1406,6 @@ #define sse_purge07cache STEPNAME(sse_purge07cache) #define sse_reflect_reg STEPNAME(sse_reflect_reg) -#define ymm_mark_zero STEPNAME(ymm_mark_zero) - #define mmx_get_reg_vector STEPNAME(mmx_get_reg_vector) #define mmx_get_reg_empty_vector STEPNAME(mmx_get_reg_empty_vector) #define sse_get_reg_empty_vector STEPNAME(sse_get_reg_empty_vector) @@ -1429,7 +1425,6 @@ #define fpu_unreflectcache STEPNAME(fpu_unreflectcache) #define x87_reflectcount STEPNAME(x87_reflectcount) #define x87_unreflectcount STEPNAME(x87_unreflectcount) -#define avx_purge_ymm STEPNAME(avx_purge_ymm) #define CacheTransform STEPNAME(CacheTransform) #define rv64_move64 STEPNAME(rv64_move64) @@ -1592,9 +1587,6 @@ void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1); // Set rounding according to mxcsr flags, return reg to restore flags int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2); -// purge ymm_zero mask according to purge_ymm -void avx_purge_ymm(dynarec_rv64_t* dyn, int ninst, uint16_t mask, int s1); - void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val); @@ -1667,9 +1659,6 @@ void sse_purge07cache(dynarec_rv64_t* dyn, int ninst, int s1); // Push current value to the cache void sse_reflect_reg(dynarec_rv64_t* dyn, int ninst, int s1, int a); -// mark an ymm upper part has zero (forgetting upper part if needed) -void ymm_mark_zero(dynarec_rv64_t* dyn, int ninst, int a); - // common coproc helpers // reset the cache with n void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n); @@ -1931,7 +1920,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, #define FCOMIS(v1, v2, s1, s2, s3, s4, s5) FCOMI(S, v1, v2, s1, s2, s3, s4, s5) #define FCOMID(v1, v2, s1, s2, s3, s4, s5) FCOMI(D, v1, v2, s1, s2, s3, s4, s5) -#define PURGE_YMM() avx_purge_ymm(dyn, ninst, dyn->insts[ninst + 1].purge_ymm, x1) +#define PURGE_YMM() // reg = (reg < -32768) ? -32768 : ((reg > 32767) ? 32767 : reg) #define SAT16(reg, s) \ diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index d50bafab..de7ba7be 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -21,9 +21,7 @@ typedef struct box64env_s box64env_t; #define EXT_CACHE_SCR 7 #define EXT_CACHE_XMMW 8 #define EXT_CACHE_XMMR 9 -#define EXT_CACHE_YMMW 10 -#define EXT_CACHE_YMMR 11 -#define EXT_CACHE_MMV 12 +#define EXT_CACHE_MMV 10 #define EXT_CACHE_OLD_SD 0 #define EXT_CACHE_OLD_SS 1 @@ -116,11 +114,6 @@ typedef struct instruction_rv64_s { int pass2choice;// value for choices that are fixed on pass2 for pass3 uintptr_t natcall; uint16_t retn; - uint16_t purge_ymm; // need to purge some ymm - uint16_t ymm0_in; // bitmap of ymm to zero at purge - uint16_t ymm0_add; // the ymm0 added by the opcode - uint16_t ymm0_sub; // the ymm0 removed by the opcode - uint16_t ymm0_out; // the ymm0 at th end of the opcode uint16_t ymm0_pass2, ymm0_pass3; int barrier_maybe; uint8_t will_write:2; // [strongmem] will write to memory |