diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2024-06-06 16:48:05 +0200 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2024-06-06 16:48:05 +0200 |
| commit | 0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720 (patch) | |
| tree | 2ab46d1455b5cd96049b2f74a1f058842d2115d6 /src/dynarec | |
| parent | df47fb5be83f3226b11703f23722c7c0b6a0b271 (diff) | |
| download | box64-0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720.tar.gz box64-0dc5761ca6743d5a5f0f6d3086828f6d6ed5c720.zip | |
[INTERPRETER] Added suport for F16C extension (linked to AVX flag) ([ARM64_DYNAREC] too)
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 17 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c | 48 |
3 files changed, 77 insertions, 0 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index c7bb614d..844d29dd 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1375,18 +1375,30 @@ int convert_bitmask(uint64_t bitmask); #define FCVTN(Vd, Vn) EMIT(FCVTN_vector(0, 1, Vn, Vd)) // Convert Vn from 2*Double to higher Vd as 2*float, use FPCR rounding #define FCVTN2(Vd, Vn) EMIT(FCVTN_vector(1, 1, Vn, Vd)) +// Convert Vn from 2*Float to lower Vd as 2*float16 and clears the upper half, use FPCR rounding +#define FCVTN16(Vd, Vn) EMIT(FCVTN_vector(0, 0, Vn, Vd)) +// Convert Vn from 2*Float to higher Vd as 2*float16, use FPCR rounding +#define FCVTN162(Vd, Vn) EMIT(FCVTN_vector(1, 0, Vn, Vd)) #define FCVTXN_vector(Q, sz, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10110<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) // Convert Vn from 2*Double to lower Vd as 2*float and clears the upper half #define FCVTXN(Vd, Vn) EMIT(FCVTXN_vector(0, 1, Vn, Vd)) // Convert Vn from 2*Double to higher Vd as 2*float #define FCVTXN2(Vd, Vn) EMIT(FCVTXN_vector(1, 1, Vn, Vd)) +// Convert Vn from 2*Float to lower Vd as 2*float16 and clears the upper half +#define FCVTXN16(Vd, Vn) EMIT(FCVTXN_vector(0, 0, Vn, Vd)) +// Convert Vn from 2*Float to higher Vd as 2*float16 +#define FCVTXN162(Vd, Vn) EMIT(FCVTXN_vector(1, 0, Vn, Vd)) #define FCVTL_vector(Q, sz, Rn, Rd) ((Q)<<30 | 0<<29 | 0b01110<<24 | (sz)<<22 | 0b10000<<17 | 0b10111<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) // Convert lower Vn from 2*float to Vd as 2*double #define FCVTL(Vd, Vn) EMIT(FCVTL_vector(0, 1, Vn, Vd)) // Convert higher Vn from 2*float to Vd as 2*double #define FCVTL2(Vd, Vn) EMIT(FCVTL_vector(1, 1, Vn, Vd)) +// Convert lower Vn from 2*float16 to Vd as 2*float +#define FCVTL16(Vd, Vn) EMIT(FCVTL_vector(0, 0, Vn, Vd)) +// Convert higher Vn from 2*float16 to Vd as 2*float +#define FCVTL162(Vd, Vn) EMIT(FCVTL_vector(1, 0, Vn, Vd)) #define SCVTF_scalar(sf, type, rmode, opcode, Rn, Rd) ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd)) #define SCVTFSw(Sd, Wn) EMIT(SCVTF_scalar(0, 0b00, 0b00, 0b010, Wn, Sd)) diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index a2a45435..e4ebd5b2 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -158,6 +158,23 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(!vex.l) YMM0(gd); break; + case 0x13: + INST_NAME("VCVTPH2PS Gx, Ex"); + nextop = F8; + GETEX_Y(v1, 0, 0); + GETGX_empty(v0); + if(vex.l && v0==v1) { + q1 = fpu_get_scratch(dyn, ninst); + VMOVQ(q1, v1); + v1 = q1; + } + FCVTL16(v0, v1); + if(vex.l) { + GETGY_empty(v0, -1, -1, -1); + FCVTL162(v0, v1); + } else YMM0(gd); + break; + case 0x17: INST_NAME("VPTEST GX, EX"); SETFLAGS(X_ALL, SF_SET); diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c index cdbe93f6..e667f562 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c @@ -402,6 +402,54 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip F8; // read u8, but it's been already handled break; + case 0x1D: + INST_NAME("VCVTPS2PH Ex, Gx"); + nextop = F8; + GETGX(v0, 0); + if(MODREG) { + v1 = sse_get_reg_empty(dyn, ninst, x3, (nextop&7)+(rex.b<<3)); + } else { + WILLWRITE2(); + v1 = fpu_get_scratch(dyn, ninst); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, &unscaled, 0xfff<<(3+vex.l), vex.l?15:7, rex, NULL, 0, 1); + } + if(vex.l && v0==v1) { + q0 = fpu_get_scratch(dyn, ninst); + VMOVQ(q0, v0); + v0 = q0; + } + u8 = F8; + if(u8&4) { + s0 = sse_setround(dyn, ninst, x1, x2, x6); + } else { + u8&=3; + if(u8==1) u8=2; + else if(u8==2) u8=1; + MRS_fpcr(x1); // get fpscr + MOV32w(x2, u8); + MOVx_REG(x6, x1); + BFIx(x1, x2, 22, 2); // inject new round + MSR_fpcr(x1); // put new fpscr + s0 = x6; + } + FCVTN16(v1, v0); + if(vex.l) { + GETGY(v0, 0, MODREG?((nextop&7)+(rex.b<<3)):-1, -1,-1); + FCVTN162(v1, v0); + } + x87_restoreround(dyn, ninst, s0); + if(MODREG) { + YMM0((nextop&7)+(rex.b<<3)); + } else { + if(vex.l) { + VST128(v1, ed, fixedaddress); + } else { + VST64(v1, ed, fixedaddress); + } + SMWRITE2(); + } + break; + case 0x20: INST_NAME("VINSERTD Gx, Vx, Ex, Ib"); nextop = F8; |