From d01970f731a010aa30a691018d81d134aae35b1f Mon Sep 17 00:00:00 2001 From: ptitSeb Date: Mon, 3 Jun 2024 20:28:25 +0200 Subject: [ARM64_DYNAREC] Added AVX.66.0F3A 02/08-09/0E/14/20/40/4A-4B opcodes --- src/dynarec/arm64/arm64_printer.c | 13 ++ src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c | 187 ++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) (limited to 'src') diff --git a/src/dynarec/arm64/arm64_printer.c b/src/dynarec/arm64/arm64_printer.c index 3c2cdf7b..411cf4f2 100644 --- a/src/dynarec/arm64/arm64_printer.c +++ b/src/dynarec/arm64/arm64_printer.c @@ -1029,6 +1029,19 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "MOVI V%d.%s, #0x%x", Rd, Vd, imm<<8); return buff; } + // MOV immediate 64bits + if(isMask(opcode, "0Q10111100000iii111001iiiiiddddd", &a)) { + uint64_t tmp64u = 0; + for(int i=0; i<8; ++i) + if((imm)&(1<>5)&1)?q1:q0, (u8>>4)&1); VMOVeD(v1, 1, ((u8>>7)&1)?q1:q0, (u8>>6)&1); break; + case 0x02: + INST_NAME("VPBLENDD Gx, Vx, Ex, u8"); + nextop = F8; + q0 = fpu_get_scratch(dyn, ninst); + for(int l=0; l<1+vex.l; ++l) { + if(!l) { GETGX_empty_VXEX(v0, v2, v1, 1); u8 = F8;} else { GETGY_empty_VYEY(v0, v2, v1); u8 >>=4;} + wb1 = 0; // mask + for(int i=0; i<4; ++i) + if(u8&(1<>4)&(1<>4; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + q1 = sse_get_reg(dyn, ninst, x1, u8, 0); + GETGX_empty_VXEX(v0, v2, v1, 1); + F8; + } else { + v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, u8, (MODREG)?((nextop&7)+(rex.b<<3)):-1); + if(MODREG) + v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, vex.v, u8); + else + VLDR128_U12(v1, ed, fixedaddress+16); + q1 = ymm_get_reg(dyn, ninst, x1, u8, 0, vex.v, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1); + v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, u8, (MODREG)?((nextop&7)+(rex.b<<3)):-1); + } + VSSHRQ_32(q0, q1, 31); // create mask + if(v0==v1) + VBIFQ(v0, v2, q0); + else { + if(v0!=v2) VMOVQ(v0, v2); + VBITQ(v0, v1, q0); + } + } + if(!vex.l) YMM0(gd); + break; + case 0x4B: + INST_NAME("VBLENDVPD Gx, Vx, Ex, XMMImm8"); + nextop = F8; + q0 = fpu_get_scratch(dyn, ninst); + u8 = geted_ib(dyn, addr, ninst, nextop)>>4; + for(int l=0; l<1+vex.l; ++l) { + if(!l) { + q1 = sse_get_reg(dyn, ninst, x1, u8, 0); + GETGX_empty_VXEX(v0, v2, v1, 1); + F8; + } else { + v2 = ymm_get_reg(dyn, ninst, x1, vex.v, 0, gd, u8, (MODREG)?((nextop&7)+(rex.b<<3)):-1); + if(MODREG) + v1 = ymm_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0, gd, vex.v, u8); + else + VLDR128_U12(v1, ed, fixedaddress+16); + q1 = ymm_get_reg(dyn, ninst, x1, u8, 0, vex.v, gd, (MODREG)?((nextop&7)+(rex.b<<3)):-1); + v0 = ymm_get_reg_empty(dyn, ninst, x1, gd, vex.v, u8, (MODREG)?((nextop&7)+(rex.b<<3)):-1); + } + VSSHRQ_64(q0, q1, 63); // create mask + if(v0==v1) + VBIFQ(v0, v2, q0); + else { + if(v0!=v2) VMOVQ(v0, v2); + VBITQ(v0, v1, q0); + } + } + if(!vex.l) YMM0(gd); + break; default: DEFAULT; -- cgit 1.4.1