diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-01-05 14:22:13 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-01-05 14:22:13 +0100 |
| commit | 077ba65f7e1ada261999197ea623488675df8cfa (patch) | |
| tree | 752e53fa514e923d59a147a35f87943d722d6a06 | |
| parent | 6f6a42642418ea34a77a0648a75957a28b733a1e (diff) | |
| download | box64-077ba65f7e1ada261999197ea623488675df8cfa.tar.gz box64-077ba65f7e1ada261999197ea623488675df8cfa.zip | |
[ARM64_DYNAREC] More FRINTTS use
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 18 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 74 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_df.c | 2 |
3 files changed, 61 insertions, 33 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index cbfa8371..51dc499a 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -2171,6 +2171,24 @@ #define FRINT64XS(Sd, Sn) EMIT(FRINTxx_scalar(0b00, 0b11, Sn, Sd)) #define FRINT64XD(Dd, Dn) EMIT(FRINTxx_scalar(0b01, 0b11, Dn, Dd)) +#define FRINTxx_vector(Q, U, sz, op, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b1111<<13 | (op)<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VFRINT32ZS(Vd, Vn) EMIT(FRINTxx_vector(0, 0, 0, 0, Vn, Vd)) +#define VFRINT32ZSQ(Vd, Vn) EMIT(FRINTxx_vector(1, 0, 0, 0, Vn, Vd)) +#define VFRINT32XS(Vd, Vn) EMIT(FRINTxx_vector(0, 1, 0, 0, Vn, Vd)) +#define VFRINT32XSQ(Vd, Vn) EMIT(FRINTxx_vector(1, 1, 0, 0, Vn, Vd)) +#define VFRINT32ZD(Vd, Vn) EMIT(FRINTxx_vector(0, 0, 1, 0, Vn, Vd)) +#define VFRINT32ZDQ(Vd, Vn) EMIT(FRINTxx_vector(1, 0, 1, 0, Vn, Vd)) +#define VFRINT32XD(Vd, Vn) EMIT(FRINTxx_vector(0, 1, 1, 0, Vn, Vd)) +#define VFRINT32XDQ(Vd, Vn) EMIT(FRINTxx_vector(1, 1, 1, 0, Vn, Vd)) +#define VFRINT64ZS(Vd, Vn) EMIT(FRINTxx_vector(0, 0, 0, 1, Vn, Vd)) +#define VFRINT64ZSQ(Vd, Vn) EMIT(FRINTxx_vector(1, 0, 0, 1, Vn, Vd)) +#define VFRINT64XS(Vd, Vn) EMIT(FRINTxx_vector(0, 1, 0, 1, Vn, Vd)) +#define VFRINT64XSQ(Vd, Vn) EMIT(FRINTxx_vector(1, 1, 0, 1, Vn, Vd)) +#define VFRINT64ZD(Vd, Vn) EMIT(FRINTxx_vector(0, 0, 1, 1, Vn, Vd)) +#define VFRINT64ZDQ(Vd, Vn) EMIT(FRINTxx_vector(1, 0, 1, 1, Vn, Vd)) +#define VFRINT64XD(Vd, Vn) EMIT(FRINTxx_vector(0, 1, 1, 1, Vn, Vd)) +#define VFRINT64XDQ(Vd, Vn) EMIT(FRINTxx_vector(1, 1, 1, 1, Vn, Vd)) + // CRC32 extension #define CRC32C_gen(sf, Rm, sz, Rn, Rd) ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b010<<13 | 1<<12 | (sz)<<10 | (Rn)<<5 | (Rd)) #define CRC32CB(Wd, Wn, Wm) EMIT(CRC32C_gen(0, Wm, 0b00, Wn, Wd)) diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index 3ef67695..8a9aeb6c 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -336,24 +336,29 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if (box64_dynarec_fastround) { VFCVTZSS(q0, v1); } else { - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - ORRw_mask(x2, xZR, 1, 0); //0x80000000 - d0 = fpu_get_scratch(dyn); - for (int i=0; i<2; ++i) { + if(arm64_frintts) { + VFRINT32ZS(q0, v1); + VFCVTZSS(q0, q0); + } else { + MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit - if (i) { - VMOVeS(d0, 0, v1, i); - FRINTZS(d0, d0); - } else { - FRINTZS(d0, v1); + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeS(d0, 0, v1, i); + FRINTZS(d0, d0); + } else { + FRINTZS(d0, v1); + } + FCVTZSwS(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); } - FCVTZSwS(x1, d0); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ(x5, FPSR_IOC, 4+4); - MOVw_REG(x1, x2); - VMOVQSfrom(q0, i, x1); } } break; @@ -369,24 +374,29 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin VFCVTZSS(q0, q0); } else { u8 = sse_setround(dyn, ninst, x1, x2, x3); - MRS_fpsr(x5); - BFCw(x5, FPSR_IOC, 1); // reset IOC bit - MSR_fpsr(x5); - ORRw_mask(x2, xZR, 1, 0); //0x80000000 - d0 = fpu_get_scratch(dyn); - for (int i=0; i<2; ++i) { + if(arm64_frintts) { + VFRINT32XS(q0, v1); + VFCVTZSS(q0, q0); + } else { + MRS_fpsr(x5); BFCw(x5, FPSR_IOC, 1); // reset IOC bit - if (i) { - VMOVeS(d0, 0, v1, i); - FRINTIS(d0, d0); - } else { - FRINTIS(d0, v1); + MSR_fpsr(x5); + ORRw_mask(x2, xZR, 1, 0); //0x80000000 + d0 = fpu_get_scratch(dyn); + for (int i=0; i<2; ++i) { + BFCw(x5, FPSR_IOC, 1); // reset IOC bit + if (i) { + VMOVeS(d0, 0, v1, i); + FRINTIS(d0, d0); + } else { + FRINTIS(d0, v1); + } + FCVTZSwS(x1, d0); + MRS_fpsr(x5); // get back FPSR to check the IOC bit + TBZ(x5, FPSR_IOC, 4+4); + MOVw_REG(x1, x2); + VMOVQSfrom(q0, i, x1); } - FCVTZSwS(x1, d0); - MRS_fpsr(x5); // get back FPSR to check the IOC bit - TBZ(x5, FPSR_IOC, 4+4); - MOVw_REG(x1, x2); - VMOVQSfrom(q0, i, x1); } x87_restoreround(dyn, ninst, u8); } diff --git a/src/dynarec/arm64/dynarec_arm64_df.c b/src/dynarec/arm64/dynarec_arm64_df.c index b81c4128..5853fd39 100644 --- a/src/dynarec/arm64/dynarec_arm64_df.c +++ b/src/dynarec/arm64/dynarec_arm64_df.c @@ -176,7 +176,7 @@ uintptr_t dynarec64_DF(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = x1; s0 = fpu_get_scratch(dyn); #if 0 - // this version needs ARM v8.5, //TODO: add detection of this extension to use it + // this version needs ARM v8.5, and doesn't handle saturation for 32bits integer not fitting 16bits FRINT32ZD(s0, v1); // no saturation instruction on Arm, so using NEON VFCVTZSd(s0, s0); |