From 9a30b65180384554df36fc5644e43d418368c3ee Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 3 Jun 2024 17:33:55 +0200 Subject: [ARM64_DYNAREC] Added AVX.66.0F38 17/20-25/29/37-40/45-47/78-79/99/AA/AE-AF/BB-BD/F7 opcodes --- src/dynarec/arm64/arm64_emitter.h | 26 +- src/dynarec/arm64/arm64_printer.c | 8 + src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 471 +++++++++++++++++++++++++- 3 files changed, 503 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index 4817e130..b8c71b6b 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -1156,6 +1156,13 @@ int convert_bitmask(uint64_t bitmask); #define FABSS(Sd, Sn) EMIT(FNEGABS_scalar(0b00, 0b01, Sn, Sd)) #define FABSD(Dd, Dn) EMIT(FNEGABS_scalar(0b01, 0b01, Dn, Dd)) +#define FNEGABS_vector(Q, U, sz, Rn, Rd) ((Q)<<30 | (U)<<29 | 0b01110<<24 | 1<<23 | (sz)<<22 | 0b10000<<17 | 0b01111<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) +#define VFNEGS(Vd, Vn) EMIT(FNEGABS_vector(0, 1, 0, Vn, Vd)) +#define VFNEGQS(Vd, Vn) EMIT(FNEGABS_vector(1, 1, 0, Vn, Vd)) +#define VFNEGQD(Vd, Vn) EMIT(FNEGABS_vector(1, 1, 1, Vn, Vd)) +#define VFABSS(Vd, Vn) EMIT(FNEGABS_vector(0, 0, 0, Vn, Vd)) +#define VFABSQS(Vd, Vn) EMIT(FNEGABS_vector(1, 0, 0, Vn, Vd)) +#define VFABSQD(Vd, Vn) EMIT(FNEGABS_vector(1, 0, 1, Vn, Vd)) // MUL #define FMUL_vector(Q, sz, Rm, Rn, Rd) ((Q)<<30 | 1<<29 | 0b01110<<24 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11011<<11 | 1<<10 | (Rn)<<5 | (Rd)) @@ -1452,11 +1459,23 @@ int convert_bitmask(uint64_t bitmask); #define FMAXNMD(Dd, Dn, Dm) EMIT(FMINMAX_scalar(0b01, Dm, 0b10, Dn, Dd)) // Fused Add Multiply -#define FMADD_gen(type, o1, Rm, o0, Ra, Rn, Rd) (0b11111<<24 | (type)<<22 | (o1)<<21 | (Rm)<<16 | (o0)<<0 | (Ra)<<10 | (Rn)<<5 | (Rd)) +#define FMADD_gen(type, o1, Rm, o0, Ra, Rn, Rd) (0b11111<<24 | (type)<<22 | (o1)<<21 | (Rm)<<16 | (o0)<<15 | (Ra)<<10 | (Rn)<<5 | (Rd)) // scalar Rd = Ra + Rn*Rm #define FMADD_32(Sd, Sa, Sn, Sm) EMIT(FMADD_gen(0b00, 0, Sm, 0, Sa, Sn, Sd)) // scalar Rd = Ra + Rn*Rm #define FMADD_64(Dd, Da, Dn, Dm) EMIT(FMADD_gen(0b01, 0, Dm, 0, Da, Dn, Dd)) +// scalar Rd = -Ra - Rn*Rm +#define FNMADD_32(Sd, Sa, Sn, Sm) EMIT(FMADD_gen(0b00, 1, Sm, 0, Sa, Sn, Sd)) +// scalar Rd = -Ra - Rn*Rm +#define FNMADD_64(Dd, Da, Dn, Dm) EMIT(FMADD_gen(0b01, 1, Dm, 0, Da, Dn, Dd)) +// scalar Rd = Ra - Rn*Rm +#define FMSUB_32(Sd, Sa, Sn, Sm) EMIT(FMADD_gen(0b00, 0, Sm, 1, Sa, Sn, Sd)) +// scalar Rd = Ra - Rn*Rm +#define FMSUB_64(Dd, Da, Dn, Dm) EMIT(FMADD_gen(0b01, 0, Dm, 1, Da, Dn, Dd)) +// scalar Rd = -Ra + Rn*Rm +#define FNMSUB_32(Sd, Sa, Sn, Sm) EMIT(FMADD_gen(0b00, 1, Sm, 1, Sa, Sn, Sd)) +// scalar Rd = -Ra + Rn*Rm +#define FNMSUB_64(Dd, Da, Dn, Dm) EMIT(FMADD_gen(0b01, 1, Dm, 1, Da, Dn, Dd)) #define FMLA_vector(Q, op, sz, Rm, Rn, Rd) ((Q)<<30 | 0b01110<<24 | (op)<<23 | (sz)<<22 | 1<<21 | (Rm)<<16 | 0b11001<<11 | 1<<10 | (Rn)<<5 | (Rd)) // Vd += Vn*Vm @@ -1465,6 +1484,10 @@ int convert_bitmask(uint64_t bitmask); #define VFMLAQS(Vd, Vn, Vm) EMIT(FMLA_vector(1, 0, 0, Vm, Vn, Vd)) // Vd += Vn*Vm #define VFMLAQD(Vd, Vn, Vm) EMIT(FMLA_vector(1, 0, 1, Vm, Vn, Vd)) +// Vd -= Vn*Vm +#define VFMLSQS(Vd, Vn, Vm) EMIT(FMLA_vector(1, 1, 0, Vm, Vn, Vd)) +// Vd -= Vn*Vm +#define VFMLSQD(Vd, Vn, Vm) EMIT(FMLA_vector(1, 1, 1, Vm, Vn, Vd)) // ZIP / UZP #define ZIP_gen(Q, size, Rm, op, Rn, Rd) ((Q)<<30 | 0b001110<<24 | (size)<<22 | (Rm)<<16 | (op)<<14 | 0b11<<12 | 0b10<<10 | (Rn)<<5 | (Rd)) @@ -1838,6 +1861,7 @@ int convert_bitmask(uint64_t bitmask); #define MOVI_vector(Q, op, abc, cmode, defgh, Rd) ((Q)<<30 | (op)<<29 | 0b0111100000<<19 | (abc)<<16 | (cmode)<<12 | 1<<10 | (defgh)<<5 | (Rd)) #define MOVIQ_8(Rd, imm8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) #define MOVIQ_16(Rd, imm8, lsl8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd)) +#define MOVIQ_32(Rd, imm8) EMIT(MOVI_vector(1, 0, (((imm8)>>5)&0b111), 0b0000, ((imm8)&0b11111), Rd)) #define MOVIQ_64(Rd, imm8) EMIT(MOVI_vector(1, 1, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) #define MOVI_8(Rd, imm8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1110, ((imm8)&0b11111), Rd)) #define MOVI_16(Rd, imm8, lsl8) EMIT(MOVI_vector(0, 0, (((imm8)>>5)&0b111), 0b1000|((lsl8)?0b10:0), ((imm8)&0b11111), Rd)) diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 9e8d10ab..3c2cdf7b 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1324,6 +1324,14 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) char s = (a.t==0b00)?'S':((a.t==0b01)?'D':'?'); int n = (a.t==0)?1:2; snprintf(buff, sizeof(buff), "FM%s V%d.%d%c, V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"SUB":"ADD", Rd, n, s, Ra, n, s, Rn, n, s, Rm, n, s); + return buff; + } + // FNMADD + if(isMask(opcode, "00011111tt1mmmmmoaaaaannnnnddddd", &a)) { + char s = (a.t==0b00)?'S':((a.t==0b01)?'D':'?'); + int n = (a.t==0)?1:2; + snprintf(buff, sizeof(buff), "FNM%s V%d.%d%c, V%d.%d%c, V%d.%d%c, V%d.%d%c", option?"SUB":"ADD", Rd, n, s, Ra, n, s, Rn, n, s, Rm, n, s); + return buff; } // FMLA if(isMask(opcode, "0Q001110of1mmmmm110011nnnnnddddd", &a)) { diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 781fbfbc..d358597d 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -30,7 +30,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip uint8_t opcode = F8; uint8_t nextop, u8; - uint8_t gd, ed; + uint8_t gd, ed, vd; uint8_t wback, wb1, wb2; uint8_t eb1, eb2, gb1, gb2; int32_t i32, i32_; @@ -123,6 +123,50 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(!vex.l) YMM0(gd); break; + case 0x17: + INST_NAME("VPTEST GX, EX"); + SETFLAGS(X_ALL, SF_SET); + nextop = F8; + GETGX(v0, 0); + if(MODREG) { + GETEX(v1, 0, 0); + } else { + v1 = fpu_get_scratch(dyn, ninst); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0xffe<<4, 15, rex, NULL, 0, 0); + } + v2 = fpu_get_scratch(dyn, ninst); + if(vex.l) { + GETGY(v0, 0, MODREG?((nextop&7)+(rex.b<<3)):-1, -1, -1); + if(MODREG) { + GETEY(v1); + } else { + VLDR128_U12(v1, ed, fixedaddress+16); + } + } else { + if(!MODREG) + VLDR128_U12(v1, ed, fixedaddress); + } + IFX(X_ZF) { + VANDQ(v2, v0, v1); + UQXTN_32(v2, v2); + VMOVQDto(x2, v2, 0); + CMPSw_U12(x2, 0); + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_ZF, 1); + } + IFX(X_CF) { + VBICQ(v2, v1, v0); + UQXTN_32(v2, v2); + VMOVQDto(x2, v2, 0); + CMPSw_U12(x2, 0); + CSETw(x2, cEQ); + BFIw(xFlags, x2, F_CF, 1); + } + IFX(X_AF|X_SF|X_OF|X_PF) { + MOV32w(x2, (1<