diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-03-24 16:00:17 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-03-24 16:00:17 +0100 |
| commit | 571de7f07f026aa955f7ffb926124e87c737aba1 (patch) | |
| tree | 2c13b37ba30f815536928e2d01acff536093d03a | |
| parent | b055c2be65ef407841f6988d44b69ffc0bfe281c (diff) | |
| download | box64-571de7f07f026aa955f7ffb926124e87c737aba1.tar.gz box64-571de7f07f026aa955f7ffb926124e87c737aba1.zip | |
[DYNAREC] Added F2 0F 2D opcodes, and use a simpler version of the conversions (but complex one is still available under a define in dynarec_arm64_helper.h)
| -rwxr-xr-x | src/dynarec/arm64_emitter.h | 12 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_660f.c | 4 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_f20f.c | 29 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_helper.h | 3 |
4 files changed, 48 insertions, 0 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h index 23916e66..9bc3190c 100755 --- a/src/dynarec/arm64_emitter.h +++ b/src/dynarec/arm64_emitter.h @@ -866,28 +866,38 @@ // Floating-point Convert to Signed integer, rounding toward Minus infinity #define FCVTMSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b10, 0b100, Sn, Wd)) #define FCVTMSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b10, 0b100, Sn, Xd)) +#define FCVTMSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b100, Sn, Xd)) #define FCVTMSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b10, 0b100, Dn, Wd)) #define FCVTMSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b10, 0b100, Dn, Xd)) +#define FCVTMSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b10, 0b100, Dn, Xd)) // Floating-point Convert to Unsigned integer, rounding toward Minus infinity #define FCVTMUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b10, 0b101, Sn, Wd)) #define FCVTMUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b10, 0b101, Sn, Xd)) +#define FCVTMUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b10, 0b101, Sn, Xd)) #define FCVTMUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b10, 0b101, Dn, Wd)) #define FCVTMUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b10, 0b101, Dn, Xd)) +#define FCVTMUxwD(Xd, Dn) EMIT(FCVT_scalar(rfex.w, 0b01, 0b10, 0b101, Dn, Xd)) // Floating-point Convert to Signed integer, rounding to nearest with ties to even #define FCVTNSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b000, Sn, Wd)) #define FCVTNSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b000, Sn, Xd)) +#define FCVTNSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b000, Sn, Xd)) #define FCVTNSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b000, Dn, Wd)) #define FCVTNSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b000, Dn, Xd)) +#define FCVTNSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b000, Dn, Xd)) // Floating-point Convert to Unsigned integer, rounding to nearest with ties to even #define FCVTNUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b00, 0b001, Sn, Wd)) #define FCVTNUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b00, 0b001, Sn, Xd)) +#define FCVTNUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b00, 0b001, Sn, Xd)) #define FCVTNUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b00, 0b001, Dn, Wd)) #define FCVTNUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b00, 0b001, Dn, Xd)) +#define FCVTNUxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b00, 0b001, Dn, Xd)) // Floating-point Convert to Signed integer, rounding toward Plus infinity #define FCVTPSwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b01, 0b000, Sn, Wd)) #define FCVTPSxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b01, 0b000, Sn, Xd)) +#define FCVTPSxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b01, 0b000, Sn, Xd)) #define FCVTPSwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b01, 0b000, Dn, Wd)) #define FCVTPSxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b01, 0b000, Dn, Xd)) +#define FCVTPSxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b01, 0b000, Dn, Xd)) // Floating-point Convert to Unsigned integer, rounding toward Plus infinity #define FCVTPUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b01, 0b001, Sn, Wd)) #define FCVTPUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b01, 0b001, Sn, Xd)) @@ -903,8 +913,10 @@ // Floating-point Convert to Unsigned integer, rounding toward Zero #define FCVTZUwS(Wd, Sn) EMIT(FCVT_scalar(0, 0b00, 0b11, 0b001, Sn, Wd)) #define FCVTZUxS(Xd, Sn) EMIT(FCVT_scalar(1, 0b00, 0b11, 0b001, Sn, Xd)) +#define FCVTZUxwS(Xd, Sn) EMIT(FCVT_scalar(rex.w, 0b00, 0b11, 0b001, Sn, Xd)) #define FCVTZUwD(Wd, Dn) EMIT(FCVT_scalar(0, 0b01, 0b11, 0b001, Dn, Wd)) #define FCVTZUxD(Xd, Dn) EMIT(FCVT_scalar(1, 0b01, 0b11, 0b001, Dn, Xd)) +#define FCVTZUxwD(Xd, Dn) EMIT(FCVT_scalar(rex.w, 0b01, 0b11, 0b001, Dn, Xd)) #define FCVT_vector_scalar(U, o2, sz, o1, Rn, Rd) (0b01<<30 | (U)<<29 | 0b11110<<24 | (o2)<<23 | (sz)<<22 | 0b10000<<17 | 0b1110<<13 | (o1)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) // Floating-point Convert to (Un)signed integer, rounding to nearest with ties to Away diff --git a/src/dynarec/dynarec_arm64_660f.c b/src/dynarec/dynarec_arm64_660f.c index 7c23baed..b1e64b39 100755 --- a/src/dynarec/dynarec_arm64_660f.c +++ b/src/dynarec/dynarec_arm64_660f.c @@ -203,6 +203,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETEX(v1, 0); GETGX_empty(v0); + #ifdef PRECISE_CVT LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); UBFXx(x1, x1, 13, 2); // extract round requested LSLx_REG(x1, x1, 3); @@ -218,6 +219,9 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n VFCVTPSQS(v0, v1); // 2: Toward +inf B_NEXT_nocond; VFCVTZSQS(v0, v1); // 3: Toward 0 + #else + VFCVTNSQS(v0, v1); + #endif break; case 0x60: diff --git a/src/dynarec/dynarec_arm64_f20f.c b/src/dynarec/dynarec_arm64_f20f.c index c153f907..e8200877 100755 --- a/src/dynarec/dynarec_arm64_f20f.c +++ b/src/dynarec/dynarec_arm64_f20f.c @@ -48,6 +48,10 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n int d0, d1; int fixedaddress; +#ifdef PRECISE_CVT + int j32; + MAYUSE(j32); +#endif MAYUSE(d0); MAYUSE(d1); MAYUSE(q0); @@ -107,6 +111,31 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n GETEX(q0, 0); FCVTZSxwD(gd, q0); break; + case 0x2D: + INST_NAME("CVTSD2SI Gd, Ex"); + nextop = F8; + GETGD; + GETEX(q0, 0); + #ifdef PRECISE_CVT + LDRH_U12(x1, xEmu, offsetof(x64emu_t, mxcsr)); + UBFXx(x1, x1, 13, 2); // extract round requested + LSLx_REG(x1, x1, 3); + ADDx_U12(x1, x1, 8); // add the actual add+jump opcodes + // Construct a "switch case", with each case 2 instructions, so 8 bytes + BL(+4); // Branch with Link to next, so LR gets next PC address + ADDx_REG(xLR, xLR, x1); + B(xLR); // could use RET, but it's not really one + FCVTNSxwD(gd, q0); // 0: Nearest (even) + B_NEXT_nocond; + FCVTMSxwD(gd, q0); // 1: Toward -inf + B_NEXT_nocond; + FCVTPSxwD(gd, q0); // 2: Toward +inf + B_NEXT_nocond; + FCVTZSxwD(gd, q0); // 3: Toward 0 + #else + FCVTNSxwD(gd, q0); + #endif + break; case 0x51: diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h index f61f9d23..53f8da67 100755 --- a/src/dynarec/dynarec_arm64_helper.h +++ b/src/dynarec/dynarec_arm64_helper.h @@ -1,6 +1,9 @@ #ifndef __DYNAREC_ARM64_HELPER_H__ #define __DYNAREC_ARM64_HELPER_H__ +// undef to get Close to SSE Float->int conversions +//#define PRECISE_CVT + #if STEP == 0 #include "dynarec_arm64_pass0.h" #elif STEP == 1 |