diff options
Diffstat (limited to 'src/dynarec/arm64/dynarec_arm64_df.c')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_df.c | 138 |
1 files changed, 99 insertions, 39 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c index c9a6a9f6..c1ae66a3 100644 --- a/src/dynarec/arm64/dynarec_arm64_df.c +++ b/src/dynarec/arm64/dynarec_arm64_df.c @@ -1,7 +1,6 @@ #include <stdio.h> #include <stdlib.h> #include <stddef.h> -#include <pthread.h> #include <errno.h> #include "debug.h" @@ -178,6 +177,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin #else MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit + BFCw(x5, FPSR_QC, 1); // reset QC bit MSR_fpsr(x5); if(ST_IS_F(0)) { VFCVTZSs(s0, v1); @@ -185,13 +185,16 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VFCVTZSd(s0, v1); SQXTN_S_D(s0, s0); } - SQXTN_H_S(s0, s0); - VST16(s0, wback, fixedaddress); + VMOVSto(x3, s0, 0); MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x8000); - STH(x5, wback, fixedaddress); + TBNZ_MARK2(x5, FPSR_IOC); + SXTHw(x5, x3); // check if 16bits value is fine + SUBw_REG(x5, x5, x3); + CBZw_MARK3(x5); + MARK2; + MOV32w(x3, 0x8000); MARK3; + STH(x3, wback, fixedaddress); #endif x87_do_pop(dyn, ninst, x3); break; @@ -212,6 +215,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin #else MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit + BFCw(x5, FPSR_QC, 1); // reset QC bit MSR_fpsr(x5); if(ST_IS_F(0)) { FRINTXS(s0, v1); @@ -221,13 +225,16 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VFCVTZSd(s0, s0); SQXTN_S_D(s0, s0); } - SQXTN_H_S(s0, s0); - VST16(s0, wback, fixedaddress); + VMOVSto(x3, s0, 0); MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x8000); - STH(x5, wback, fixedaddress); + TBNZ_MARK2(x5, FPSR_IOC); + SXTHw(x5, x3); // check if 16bits value is fine + SUBw_REG(x5, x5, x3); + CBZw_MARK3(x5); + MARK2; + MOV32w(x3, 0x8000); MARK3; + STH(x3, wback, fixedaddress); #endif x87_restoreround(dyn, ninst, u8); break; @@ -257,13 +264,16 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VFCVTZSd(s0, s0); SQXTN_S_D(s0, s0); } - SQXTN_H_S(s0, s0); - VST16(s0, wback, fixedaddress); + VMOVSto(x3, s0, 0); MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - MOV32w(x5, 0x8000); - STH(x5, wback, fixedaddress); + TBNZ_MARK2(x5, FPSR_IOC); + SXTHw(x5, x3); // check if 16bits value is fine + SUBw_REG(x5, x5, x3); + CBZw_MARK3(x5); + MARK2; + MOV32w(x3, 0x8000); MARK3; + STH(x3, wback, fixedaddress); #endif x87_do_pop(dyn, ninst, x3); x87_restoreround(dyn, ninst, u8); @@ -277,10 +287,31 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 5: INST_NAME("FILD ST0, i64"); - v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_D); + v1 = x87_do_push(dyn, ninst, x1, NEON_CACHE_ST_I64); addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); - LDx(x1, wback, fixedaddress); - SCVTFDx(v1, x1); + VLD64(v1, wback, fixedaddress); + if(!ST_IS_I64(0)) { + if(rex.is32bits) { + // need to also feed the STll stuff... + ADDx_U12(x4, xEmu, offsetof(x64emu_t, fpu_ll)); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, top)); + int a = 0 - dyn->n.x87stack; + if(a) { + if(a<0) { + SUBw_U12(x1, x1, -a); + } else { + ADDw_U12(x1, x1, a); + } + ANDw_mask(x1, x1, 0, 2); //mask=7 + } + ADDx_REG_LSL(x1, x4, x1, 4); // fpu_ll is 2 i64 + VSTR64_U12(v1, x1, 8); // ll + } + SCVTFDD(v1, v1); + if(rex.is32bits) { + VSTR64_U12(v1, x1, 0); // ref + } + } break; case 6: INST_NAME("FBSTP tbytes, ST0"); @@ -292,29 +323,58 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin break; case 7: INST_NAME("FISTP i64, ST0"); - v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_D); - u8 = x87_setround(dyn, ninst, x1, x2, x4); + v1 = x87_get_st(dyn, ninst, x1, x2, 0, NEON_CACHE_ST_I64); + if(!ST_IS_I64(0)) { + u8 = x87_setround(dyn, ninst, x1, x2, x4); + } addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<3, 7, rex, NULL, 0, 0); ed = x1; s0 = fpu_get_scratch(dyn); - #if 0 - FRINT64XD(s0, v1); - VFCVTZSd(s0, s0); - VSTR64_U12(s0, wback, fixedaddress); - #else - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - FRINTXD(s0, v1); - VFCVTZSd(s0, s0); - VST64(s0, wback, fixedaddress); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ_MARK3(x5, FPSR_IOC); - ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000 - STx(x5, wback, fixedaddress); - MARK3; - #endif - x87_restoreround(dyn, ninst, u8); + if(ST_IS_I64(0)) { + VST64(v1, wback, fixedaddress); + } else { + #if 0 + FRINT64XD(s0, v1); + VFCVTZSd(s0, s0); + VSTR64_U12(s0, wback, fixedaddress); + #else + if(rex.is32bits) { + // need to check STll first... + ADDx_U12(x5, xEmu, offsetof(x64emu_t, fpu_ll)); + LDRw_U12(x1, xEmu, offsetof(x64emu_t, top)); + VMOVQDto(x3, v1, 0); + int a = 0 - dyn->n.x87stack; + if(a) { + if(a<0) { + SUBw_U12(x1, x1, -a); + } else { + ADDw_U12(x1, x1, a); + } + ANDw_mask(x1, x1, 0, 2); //mask=7 + } + ADDx_REG_LSL(x1, x5, x1, 4); // fpu_ll is 2 i64 + LDRx_U12(x5, x1, 0); // ref + SUBx_REG(x5, x5, x3); + CBNZx_MARK2(x5); + LDRx_U12(x5, x1, 8); // ll + STx(x5, wback, fixedaddress); + B_MARK3(c__); + MARK2; + } + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + FRINTXD(s0, v1); + VFCVTZSd(s0, s0); + VST64(s0, wback, fixedaddress); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ_MARK3(x5, FPSR_IOC); + ORRx_mask(x5, xZR, 1, 1, 0); //0x8000000000000000 + STx(x5, wback, fixedaddress); + MARK3; + #endif + x87_restoreround(dyn, ninst, u8); + } x87_do_pop(dyn, ninst, x3); break; default: |