diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2023-04-01 13:17:09 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2023-04-01 13:18:34 +0200 |
| commit | 5661de58a8373801b0ecbd2dab016678cbb2a964 (patch) | |
| tree | 4f69500c2830a7a9515aa4012c4979ec76f01a8e /src | |
| parent | a1d5cb0e6961c68499113345c02bcff42a0e9fab (diff) | |
| download | box64-5661de58a8373801b0ecbd2dab016678cbb2a964.tar.gz box64-5661de58a8373801b0ecbd2dab016678cbb2a964.zip | |
[ARM64_DYNAREC] Fixed and improved 66 0F 5A/5B opcodes
Diffstat (limited to 'src')
| -rwxr-xr-x | src/dynarec/arm64/arm64_emitter.h | 6 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_660f.c | 37 |
2 files changed, 38 insertions, 5 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 7d6b76cd..c84e4f86 100755 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1266,6 +1266,12 @@ #define FCVT_D_S(Dd, Sn) EMIT(FCVT_precision(0b00, 0b01, Sn, Dd)) #define FCVT_S_D(Sd, Dn) EMIT(FCVT_precision(0b01, 0b00, Dn, Sd)) +#define FCVTN_vector(Q, sz, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10110<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +// Convert Vn from 2*Double to lower Vd as 2*float and clears the upper half, use FPCR rounding +#define FCVTN(Vd, Vn) EMIT(FCVTN_vector(0, 1, Vn, Vd)) +// Convert Vn from 2*Double to higher Vd as 2*float, use FPCR rounding +#define FCVTN2(Vd, Vn) EMIT(FCVTN_vector(1, 1, Vn, Vd)) + #define FCVTXN_vector(Q, sz, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10110<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) // Convert Vn from 2*Double to lower Vd as 2*float and clears the upper half #define FCVTXN(Vd, Vn) EMIT(FCVTXN_vector(0, 1, Vn, Vd)) diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index eea7519c..5b21e174 100755 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -1058,17 +1058,44 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n nextop = F8; GETEX(v1, 0, 0); GETGX_empty(v0); - FCVTXN(v0, v1); + if(box64_dynarec_fastround) { + FCVTXN(v0, v1); + } else { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + FCVTN(v0, v1); + x87_restoreround(dyn, ninst, u8); + } break; case 0x5B: INST_NAME("CVTPS2DQ Gx, Ex"); nextop = F8; GETEX(v1, 0, 0); GETGX_empty(v0); - u8 = sse_setround(dyn, ninst, x1, x2, x3); - VFRINTISQ(v0, v1); - x87_restoreround(dyn, ninst, u8); - VFCVTZSQS(v0, v0); + if(box64_dynarec_fastround) { + u8 = sse_setround(dyn, ninst, x1, x2, x3); + VFRINTISQ(q0, v1); + x87_restoreround(dyn, ninst, u8); + VFCVTZSQS(q0, q0); + } else { + MRS_fpsr(x5); + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + u8 = sse_setround(dyn, ninst, x1, x2, x3); + MOV32w(x4, 0x80000000); + d0 = fpu_get_scratch(dyn); + for(int i=0; i<4; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + MSR_fpsr(x5); + VMOVeS(d0, 0, v1, i); + FRINTIS(d0, d0); + VFCVTZSs(d0, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + VMOVQSfrom(d0, 0, x4); + VMOVeS(v0, i, d0, 0); + } + x87_restoreround(dyn, ninst, u8); + } break; case 0x5C: INST_NAME("SUBPD Gx, Ex"); |