From b055c2be65ef407841f6988d44b69ffc0bfe281c Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Wed, 24 Mar 2021 15:33:52 +0100 Subject: [DYNAREC] Refined 66 0F 5B with a switch for the rounding mode --- src/dynarec/arm64_emitter.h | 7 ++++++- src/dynarec/dynarec_arm64_660f.c | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 86fe165f..23916e66 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -60,6 +60,8 @@ #define w7 x7 // emu is r0 #define xEmu 0 +// ARM64 LR +#define xLR 30 // ARM64 SP is r31 but is a special register #define xSP 31 // xZR regs is 31 @@ -340,6 +342,9 @@ #define B_gen(imm26) (0b000101<<26 | (imm26)) #define B(imm26) EMIT(B_gen(((imm26)>>2)&0x3ffffff)) +#define BL_gen(imm26) (0b100101<<26 | (imm26)) +#define BL(imm26) EMIT(BL_gen(((imm26)>>2)&0x3ffffff)) + #define NOP EMIT(0b11010101000000110010000000011111) #define CSINC_gen(sf, Rm, cond, Rn, Rd) ((sf)<<31 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | 1<<10 | (Rn)<<5 | (Rd)) @@ -930,7 +935,7 @@ #define VFCVTZUs(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 0, 1, Vn, Vd)) #define VFCVTZUd(Vd, Vn) EMIT(FCVT2_vector_scalar(1, 1, 1, 1, Vn, Vd)) -#define FCVT_vector(Q, U, o2, sz, o1, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o2)<<23 | (sz)<<22) | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define FCVT_vector(Q, U, o2, sz, o1, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) // Floating-point Convert to (Un)signed integer, rounding to nearest with ties to Away #define VFCVTASS(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 0, 0, Vn, Vd)) #define VFCVTASD(Vd, Vn) EMIT(FCVT_vector(0, 0, 0, 1, 0, Vn, Vd)) diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c index d9fd72bc..7c23baed 100755 --- a/src/dynarec/dynarec_arm64_660f.c +++ b/src/dynarec/dynarec_arm64_660f.c @@ -203,8 +203,21 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETEX(v1, 0); GETGX_empty(v0); - // need rounding? using "toward 0 for now" - VFCVTZSQS(v0, v1); + LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); + UBFXx(x1, x1, 13, 2); // extract round requested + LSLx_REG(x1, x1, 3); + ADDx_U12(x1, x1, 8); // add the actual add+jump opcodes + // Construct a "switch case", with each case 2 instructions, so 8 bytes + BL(+4); // Branch with Link to next, so LR gets next PC address + ADDx_REG(xLR, xLR, x1); + B(xLR); // could use RET, but it's not really one + VFCVTNSQS(v0, v1); // 0: Nearest (even) + B_NEXT_nocond; + VFCVTMSQS(v0, v1); // 1: Toward -inf + B_NEXT_nocond; + VFCVTPSQS(v0, v1); // 2: Toward +inf + B_NEXT_nocond; + VFCVTZSQS(v0, v1); // 3: Toward 0 break; case 0x60: -- cgit 1.4.1