diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-10-27 17:38:16 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-10-27 17:38:16 +0200 |
| commit | 15860f324532247345e1f314eb8ebbfe37c5d531 (patch) | |
| tree | 507acb224ac3cd61bfb30a1f51707b6347a8562d /src | |
| parent | 8a1e4cdf306ff3f57a8603004e068549248db29b (diff) | |
| download | box64-15860f324532247345e1f314eb8ebbfe37c5d531.tar.gz box64-15860f324532247345e1f314eb8ebbfe37c5d531.zip | |
[ARM64_DYNAREC] Fixed and improved i64 x87 optimisation
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_dd.c | 44 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.c | 7 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_functions.h | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 8 |
4 files changed, 38 insertions, 24 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_dd.c b/src/dynarec/arm64/dynarec_arm64_dd.c index c56258d1..3c6b6965 100644 --- a/src/dynarec/arm64/dynarec_arm64_dd.c +++ b/src/dynarec/arm64/dynarec_arm64_dd.c @@ -166,27 +166,31 @@ uintptr_t dynarec64_DD(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 1: INST_NAME("FISTTP i64, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_I64); addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); - s0 = fpu_get_scratch(dyn); - #if 0 - // those are ARM 8.5 opcode! - FRINT64ZD(s0, v1); - FCVTZSxD(x2, s0); - STRx_U12(x2, ed, fixedaddress); - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - FRINTRRD(s0, v1, 3); - FCVTZSxD(x2, s0); - STx(x2, ed, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000 - STx(x5, ed, fixedaddress); - MARK3; - #endif + if(ST_IS_I64(0)) { + VST64(v1, ed, fixedaddress); + } else { + s0 = fpu_get_scratch(dyn); + #if 0 + // those are ARM 8.5 opcode! + FRINT64ZD(s0, v1); + FCVTZSxD(x2, s0); + STRx_U12(x2, ed, fixedaddress); + #else + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTRRD(s0, v1, 3); + FCVTZSxD(x2, s0); + STx(x2, ed, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000 + STx(x5, ed, fixedaddress); + MARK3; + #endif + } X87_POP_OR_FAIL(dyn, ninst, x3); break; case 2: diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c index f95fd7a9..28e1fc5b 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.c +++ b/src/dynarec/arm64/dynarec_arm64_functions.c @@ -95,6 +95,13 @@ void fpu_reset_reg(dynarec_arm_t* dyn) } +int neoncache_no_i64(int a) +{ + if(a==NEON_CACHE_ST_I64) + return NEON_CACHE_ST_D; + return a; +} + int neoncache_get_st(dynarec_arm_t* dyn, int ninst, int a) { if (dyn->insts[ninst].n.swapped) { diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h index 77982715..0111c8a2 100644 --- a/src/dynarec/arm64/dynarec_arm64_functions.h +++ b/src/dynarec/arm64/dynarec_arm64_functions.h @@ -37,6 +37,9 @@ int neoncache_get_current_st_f_i64(dynarec_arm_t* dyn, int a); void neoncache_promote_double(dynarec_arm_t* dyn, int ninst, int a); // Combine and propagate if needed (pass 1 only) int neoncache_combine_st(dynarec_arm_t* dyn, int ninst, int a, int b); // with stack current dyn->n_stack* +// Do not allow i64 type +int neoncache_no_i64(int a); + // FPU Cache transformation (for loops) // Specific, need to be written by backend int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst); diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index b3f02ee5..85480f27 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -1203,8 +1203,8 @@ int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b); #define ST_IS_F(A) (neoncache_get_current_st(dyn, ninst, A)==NEON_CACHE_ST_F) #define ST_IS_I64(A) (neoncache_get_current_st(dyn, ninst, A)==NEON_CACHE_ST_I64) #define X87_COMBINE(A, B) neoncache_combine_st(dyn, ninst, A, B) -#define X87_ST0 neoncache_get_current_st(dyn, ninst, 0) -#define X87_ST(A) neoncache_get_current_st(dyn, ninst, A) +#define X87_ST0 neoncache_no_i64(neoncache_get_current_st(dyn, ninst, 0)) +#define X87_ST(A) neoncache_no_i64(neoncache_get_current_st(dyn, ninst, A)) #else #define ST_IS_F(A) (neoncache_get_st(dyn, ninst, A)==NEON_CACHE_ST_F) #define ST_IS_I64(A) (neoncache_get_st(dyn, ninst, A)==NEON_CACHE_ST_I64) @@ -1213,8 +1213,8 @@ int neoncache_st_coherency(dynarec_arm_t* dyn, int ninst, int a, int b); #else #define X87_COMBINE(A, B) neoncache_get_st(dyn, ninst, A) #endif -#define X87_ST0 neoncache_get_st(dyn, ninst, 0) -#define X87_ST(A) neoncache_get_st(dyn, ninst, A) +#define X87_ST0 neoncache_no_i64(neoncache_get_st(dyn, ninst, 0)) +#define X87_ST(A) neoncache_no_i64(neoncache_get_st(dyn, ninst, A)) #endif //MMX helpers |