diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-07-02 10:34:37 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-07-02 10:34:37 +0200 |
| commit | 4b0b3fc98ae4a1e848765e0cd48f958a13fc683d (patch) | |
| tree | 8f6be5a089600eb79972f340f01491189e512ec1 /src | |
| parent | c6afd44c1c91485c278376622871975d6d20f176 (diff) | |
| download | box64-4b0b3fc98ae4a1e848765e0cd48f958a13fc683d.tar.gz box64-4b0b3fc98ae4a1e848765e0cd48f958a13fc683d.zip | |
[DYNAREC] Improved cache coherency and internal jump handling, [ARM64_DYNAREC] Improved YMM register tracking
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 30 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 4 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native.c | 16 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 4 |
5 files changed, 52 insertions, 4 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index 607b8a93..ea60745c 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -61,6 +61,9 @@ void fpu_reset_scratch(dynarec_arm_t* dyn) { dyn->n.fpu_scratch = 0; dyn->n.ymm_used = 0; + dyn->n.ymm_regs = 0; + dyn->n.ymm_write = 0; + dyn->n.ymm_removed = 0; } // Get a x87 double reg int fpu_get_reg_x87(dynarec_arm_t* dyn, int ninst, int t, int n) @@ -83,6 +86,15 @@ void fpu_free_reg(dynarec_arm_t* dyn, int reg) { // TODO: check upper limit? dyn->n.fpuused[reg] = 0; + if(dyn->n.neoncache[reg].t==NEON_CACHE_YMMR || dyn->n.neoncache[reg].t==NEON_CACHE_YMMW) { + dyn->n.ymm_removed |= 1<<dyn->n.neoncache[reg].n; + if(dyn->n.neoncache[reg].t==NEON_CACHE_YMMW) + dyn->n.ymm_write |= 1<<dyn->n.neoncache[reg].n; + if(reg>SCRATCH0) + dyn->n.ymm_regs |= (8LL+reg-SCRATCH0)<<(dyn->n.neoncache[reg].n*4); + else + dyn->n.ymm_regs |= ((uint64_t)(reg-EMM0))<<(dyn->n.neoncache[reg].n*4); + } if(dyn->n.neoncache[reg].t!=NEON_CACHE_ST_F && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_D && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_I64) dyn->n.neoncache[reg].v = 0; if(dyn->n.fpu_scratch && reg==SCRATCH0+dyn->n.fpu_scratch-1) @@ -560,6 +572,24 @@ void neoncacheUnwind(neoncache_t* cache) cache->fpuused[i] = 0; } } + // add back removed YMM + if(cache->ymm_removed) { + for(int i=0; i<16; ++i) + if(cache->ymm_removed&(1<<i)) { + int reg = cache->ymm_regs>>(i*4)&15; + if(reg>7) + reg = reg - 8 + SCRATCH0; + else + reg = reg + EMM0; + if(cache->neoncache[reg].v) + printf_log(LOG_INFO, "Warning, recreating YMM%d on non empty slot %s", i, getCacheName(cache->neoncache[reg].t, cache->neoncache[reg].n)); + cache->neoncache[reg].t = (cache->ymm_write&(1<<i))?NEON_CACHE_YMMW:NEON_CACHE_YMMR; + cache->neoncache[reg].n = i; + } + cache->ymm_regs = 0; + cache->ymm_write = cache->ymm_removed = 0; + } + cache->ymm_used = 0; } #define F8 *(uint8_t*)(addr++) diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index b333e5b9..32de5146 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -2481,7 +2481,7 @@ void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n) dyn->ymm_zero = dyn->insts[reset_n].ymm0_out; #endif #if STEP == 0 - if(box64_dynarec_dump) dynarec_log(LOG_NONE, "New x87stack=%d\n", dyn->n.x87stack); + if(box64_dynarec_dump && dyn->n.x87stack) dynarec_log(LOG_NONE, "New x87stack=%d at ResetCache in inst %d with %d\n", dyn->n.x87stack, ninst, reset_n); #endif #if defined(HAVE_TRACE) && (STEP>2) if(box64_dynarec_dump && 0) //disable for now, need more work diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index b86f1b11..c1015abd 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -59,6 +59,9 @@ typedef struct neoncache_s { int8_t fpu_scratch; // scratch counter int8_t fpu_reg; // x87/sse/mmx reg counter uint16_t ymm_used; // mask of the ymm regs used in this opcode + uint64_t ymm_regs; // 4bits (0-15) position of 16 ymmXX regs removed + uint16_t ymm_write; // 1bits of ymmXX removed write + uint16_t ymm_removed; // 1bits if ymmXX was removed } neoncache_t; typedef struct flagcache_s { @@ -140,6 +143,7 @@ void add_next(dynarec_arm_t *dyn, uintptr_t addr); uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr); void add_jump(dynarec_arm_t *dyn, int ninst); int get_first_jump(dynarec_arm_t *dyn, int next); +int get_first_jump_addr(dynarec_arm_t *dyn, uintptr_t next); int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n); int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n); diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 0bfa80ee..15ecdce8 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -101,6 +101,11 @@ void add_jump(dynarec_native_t *dyn, int ninst) { dyn->jmps[dyn->jmp_sz++] = ninst; } int get_first_jump(dynarec_native_t *dyn, int next) { + if(next<0 || next>dyn->size) + return -2; + return get_first_jump_addr(dyn, dyn->insts[next].x64.addr); +} +int get_first_jump_addr(dynarec_native_t *dyn, uintptr_t next) { for(int i=0; i<dyn->jmp_sz; ++i) if(dyn->insts[dyn->jmps[i]].x64.jmp == next) return dyn->jmps[i]; @@ -612,6 +617,17 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit } i = ii; } + // remove trailling dead code + while(helper.size && !helper.insts[helper.size-1].x64.alive) { + helper.isize-=helper.insts[helper.size-1].x64.size; + --helper.size; + } + if(!helper.size) { + // NULL block after removing dead code, how is that possible? + dynarec_log(LOG_INFO, "Warning, null-sized dynarec block after trimming dead code (%p)\n", (void*)addr); + CancelBlock64(0); + return CreateEmptyBlock(block, addr); + } pos = 0; while(pos<helper.size) pos = updateYmm0(&helper, pos); diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index 7a885c76..2ebc89cc 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -171,8 +171,6 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int fpu_reset_scratch(dyn); int next = ninst+1; #if STEP > 0 - if(!dyn->insts[ninst].x64.has_next && dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1) - next = dyn->insts[ninst].x64.jmp_insts; if(dyn->insts[ninst].x64.has_next && dyn->insts[next].x64.barrier) { if(dyn->insts[next].x64.barrier&BARRIER_FLOAT) { fpu_purgecache(dyn, ninst, 0, x1, x2, x3); @@ -228,7 +226,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int if(dyn->forward) { if(dyn->forward_to == addr && !need_epilog && ok>=0) { // we made it! - reset_n = get_first_jump(dyn, addr); + reset_n = get_first_jump_addr(dyn, addr); if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Forward extend block for %d bytes %s%p -> %p (ninst %d - %d)\n", dyn->forward_to-dyn->forward, dyn->insts[dyn->forward_ninst].x64.has_callret?"(opt. call) ":"", (void*)dyn->forward, (void*)dyn->forward_to, reset_n, ninst); if(dyn->insts[dyn->forward_ninst].x64.has_callret && !dyn->insts[dyn->forward_ninst].x64.has_next) dyn->insts[dyn->forward_ninst].x64.has_next = 1; // this block actually continue |