diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-02 13:12:06 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-02 13:12:06 +0200 |
| commit | eb209abe6237223a9af643593fcea04d223d4afe (patch) | |
| tree | 08a10d4a91fee849acb90cafc77e58c8e0938121 /src | |
| parent | 13551fd410f8484373f3ad73282828ca8713bb3f (diff) | |
| download | box64-eb209abe6237223a9af643593fcea04d223d4afe.tar.gz box64-eb209abe6237223a9af643593fcea04d223d4afe.zip | |
[ARM64_DYNAREC] Don't use fix scratch for x87 conversion, it might conflict with YMM handling
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.c | 50 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 1 |
3 files changed, 36 insertions, 17 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index e2e0f1e0..febec38b 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -70,6 +70,8 @@ void fpu_free_reg(dynarec_arm_t* dyn, int reg) dyn->n.fpuused[reg] = 0; if(dyn->n.neoncache[reg].t!=NEON_CACHE_ST_F && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_D && dyn->n.neoncache[reg].t!=NEON_CACHE_ST_I64) dyn->n.neoncache[reg].v = 0; + if(dyn->n.fpu_scratch && reg==SCRATCH0+dyn->n.fpu_scratch-1) + --dyn->n.fpu_scratch; } // Get an MMX double reg int fpu_get_reg_emm(dynarec_arm_t* dyn, int ninst, int emm) diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 20304660..6da4607d 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1119,12 +1119,20 @@ void x87_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1, int s2, int VSTR64_REG_LSL3(dyn->n.x87reg[i], s1, s3); // save the value break; case NEON_CACHE_ST_F: - FCVT_D_S(SCRATCH, dyn->n.x87reg[i]); - VSTR64_REG_LSL3(SCRATCH, s1, s3); // save the value + { + int scratch = fpu_get_scratch(dyn, ninst); + FCVT_D_S(scratch, dyn->n.x87reg[i]); + VSTR64_REG_LSL3(scratch, s1, s3); // save the value + fpu_free_reg(dyn, scratch); + } break; case NEON_CACHE_ST_I64: - SCVTFDD(SCRATCH, dyn->n.x87reg[i]); - VSTR64_REG_LSL3(SCRATCH, s1, s3); // save the value + { + int scratch = fpu_get_scratch(dyn, ninst); + SCVTFDD(scratch, dyn->n.x87reg[i]); + VSTR64_REG_LSL3(scratch, s1, s3); // save the value + fpu_free_reg(dyn, scratch); + } break; } if(!next) { @@ -1342,11 +1350,15 @@ void x87_forget(dynarec_arm_t* dyn, int ninst, int s1, int s2, int st) ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 } if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) { - FCVT_D_S(SCRATCH, reg); - VSTR64_REG_LSL3(SCRATCH, s1, s2); + int scratch = fpu_get_scratch(dyn, ninst); + FCVT_D_S(scratch, reg); + VSTR64_REG_LSL3(scratch, s1, s2); + fpu_free_reg(dyn, scratch); } else if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_I64) { - SCVTFDD(SCRATCH, reg); - VSTR64_REG_LSL3(SCRATCH, s1, s2); + int scratch = fpu_get_scratch(dyn, ninst); + SCVTFDD(scratch, reg); + VSTR64_REG_LSL3(scratch, s1, s2); + fpu_free_reg(dyn, scratch); } else { VSTR64_REG_LSL3(reg, s1, s2); } @@ -1436,11 +1448,15 @@ void x87_free(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int st) ANDw_mask(s2, s2, 0, 2); //mask=7 // (emu->top + i)&7 } if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_F) { - FCVT_D_S(SCRATCH, reg); - VSTR64_REG_LSL3(SCRATCH, s1, s2); + int scratch = fpu_get_scratch(dyn, ninst); + FCVT_D_S(scratch, reg); + VSTR64_REG_LSL3(scratch, s1, s2); + fpu_free_reg(dyn, scratch); } else if(dyn->n.neoncache[reg].t==NEON_CACHE_ST_I64) { - SCVTFDD(SCRATCH, reg); - VSTR64_REG_LSL3(SCRATCH, s1, s2); + int scratch = fpu_get_scratch(dyn, ninst); + SCVTFDD(scratch, reg); + VSTR64_REG_LSL3(scratch, s1, s2); + fpu_free_reg(dyn, scratch); } else { VSTR64_REG_LSL3(reg, s1, s2); } @@ -1965,15 +1981,17 @@ static void swapCache(dynarec_arm_t* dyn, int ninst, int i, int j, neoncache_t * MESSAGE(LOG_DUMP, "\t - Swapping %d <-> %d\n", i, j); // There is no VSWP in Arm64 NEON to swap 2 register contents! // so use a scratch... + int scratch = fpu_get_scratch(dyn, ninst); if(quad) { - VMOVQ(SCRATCH, i); + VMOVQ(scratch, i); VMOVQ(i, j); - VMOVQ(j, SCRATCH); + VMOVQ(j, scratch); } else { - VMOV(SCRATCH, i); + VMOV(scratch, i); VMOV(i, j); - VMOV(j, SCRATCH); + VMOV(j, scratch); } + fpu_free_reg(dyn, scratch); tmp.v = cache->neoncache[i].v; cache->neoncache[i].v = cache->neoncache[j].v; cache->neoncache[j].v = tmp.v; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 0252a052..7471ba55 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1500,7 +1500,6 @@ void arm64_move64(dynarec_arm_t* dyn, int ninst, int reg, uint64_t val); #define neoncache_st_coherency STEPNAME(neoncache_st_coherency) int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b); // scratch fpu regs for convertions -#define SCRATCH 31 #if STEP == 0 #define ST_IS_F(A) 0 |