diff options
| author | wannacu <76616478+wannacu@users.noreply.github.com> | 2025-02-14 21:29:43 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-02-14 14:29:43 +0100 |
| commit | efa1cbd49aa57ccc2c168fed2c4ec93e876f8e86 (patch) | |
| tree | da7f0ec7bc7a658e8b888b9f207c2fae93d6a194 /src | |
| parent | 91798194d51fe51eb236d91f9814c31511b5036a (diff) | |
| download | box64-efa1cbd49aa57ccc2c168fed2c4ec93e876f8e86.tar.gz box64-efa1cbd49aa57ccc2c168fed2c4ec93e876f8e86.zip | |
[ARM64_DYNAREC] Add some opcodes (#2358)
* [ARM64_DYNAREC] Added 66 0F 3A 41 opcode * [ARM64_DYNAREC] Added AVX.66.0F38 DB opcode * [ARM64_DYNAREC] Added AVX.66.0F3A DF opcode * [ARM64_DYNAREC] Added AVX.F2.0F38 F5 opcode * [ARM64_DYNAREC] Added 66 F3 0F BC,B8 opcode
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_660f.c | 21 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_66f30f.c | 57 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c | 17 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c | 44 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c | 31 |
5 files changed, 167 insertions, 3 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c index 2a30bc9c..ec5223ac 100644 --- a/src/dynarec/arm64/dynarec_arm64_660f.c +++ b/src/dynarec/arm64/dynarec_arm64_660f.c @@ -1238,6 +1238,27 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n } break; + case 0x41: + INST_NAME("DPPD Gx, Ex, Ib"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 1); + u8 = F8; + v0 = fpu_get_scratch(dyn, ninst); + VFMULQD(v0, q0, q1); + // mask some, duplicate all, mask some + for(int i=0; i<2; ++i) + if(!(u8&(1<<(4+i)))) { + VMOVQDfrom(v0, i, xZR); + } + FADDPD(v0, v0); + VDUPQ_64(q0, v0, 0); + for(int i=0; i<2; ++i) + if(!(u8&(1<<i))) { + VMOVQDfrom(q0, i, xZR); + } + break; + case 0x44: INST_NAME("PCLMULQDQ Gx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_66f30f.c b/src/dynarec/arm64/dynarec_arm64_66f30f.c index 432a6646..88421c9f 100644 --- a/src/dynarec/arm64/dynarec_arm64_66f30f.c +++ b/src/dynarec/arm64/dynarec_arm64_66f30f.c @@ -55,6 +55,59 @@ uintptr_t dynarec64_66F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int #endif switch(opcode) { + case 0xB8: + INST_NAME("POPCNT Gw, Ew"); + SETFLAGS(X_ALL, SF_SET); + SET_DFNONE(); + nextop = F8; + GETGW(x2); + GETEW(x1, 0); + v1 = fpu_get_scratch(dyn, ninst); + VEORQ(v1, v1, v1); + VMOVQDfrom(v1, 0, ed); + CNT_8(v1, v1); + UADDLV_8(v1, v1); + VMOVHto(gd, v1, 0); + IFX(X_ALL) { + IFX(X_AF|X_PF|X_SF|X_OF|X_CF) { + MOV32w(x1, (1<<F_OF) | (1<<F_SF) | (1<<F_ZF) | (1<<F_AF) | (1<<F_CF) | (1<<F_PF)); + BICw(xFlags, xFlags, x1); + } + IFX(X_ZF) { + CMPSw_U12(gd, 0); + IFNATIVE(NF_EQ) {} + else { + CSETw(x1, cEQ); + BFIw(xFlags, x1, F_ZF, 1); + } + } + } + GWBACK; + break; + + case 0xBC: + INST_NAME("TZCNT Gw, Ew"); + SETFLAGS(X_CF|X_ZF, SF_SUBSET); + SET_DFNONE(); + nextop = F8; + GETEW(x1, 0); + GETGW(x2); + TSTxw_REG(ed, ed); + IFX(X_CF) { + CSETw(x3, cEQ); + BFIw(xFlags, x3, F_CF, 1); // CF = is source 0? + } + RBITw(x3, ed); // reverse + CLZw(gd, x3); // x2 gets leading 0 == TZCNT + MOV32w(x3, 16); + CSELw(gd, x3, gd, cEQ); // if src is zero, use bit width as res + IFX(X_ZF) { + TSTxw_REG(gd, gd); + CSETw(x3, cEQ); + BFIw(xFlags, x3, F_ZF, 1); // ZF = is dest 0? + } + GWBACK; + break; case 0xBD: INST_NAME("LZCNT Gw, Ew"); @@ -68,14 +121,14 @@ uintptr_t dynarec64_66F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int CSETw(x3, cEQ); BFIw(xFlags, x3, F_CF, 1); // CF = is source 0? } - LSLw_IMM(ed, ed, 16); CLZw(gd, ed); + SUBw_U12(gd, gd, 16); // sub zero cnt of high word IFX(X_ZF) { TSTxw_REG(gd, gd); CSETw(x3, cEQ); BFIw(xFlags, x3, F_ZF, 1); // ZF = is dest 0? } - EWBACK; + GWBACK; break; default: diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c index 781fc2a9..4f2b1c28 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f38.c @@ -1866,6 +1866,23 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(!vex.l) YMM0(gd); break; + case 0xDB: + INST_NAME("VAESIMC Gx, Ex"); + nextop = F8; + GETGX_empty_EX(v0, v1, 0); + if(arm64_aes) { + AESIMC(v0, v1); + } else { + if(v0!=v1) { + VMOVQ(v0, v1); + } + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); + CALL(native_aesimc, -1); + } + if(!vex.l) YMM0(gd); + break; + case 0xDC: INST_NAME("VAESENC Gx, Vx, Ex"); // AES-NI nextop = F8; diff --git a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c index a2111a4e..1096b528 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_66_0f3a.c @@ -632,6 +632,26 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(!vex.l) YMM0(gd); break; + case 0x41: + INST_NAME("VDPPD Gx, Vx, Ex, Ib"); + nextop = F8; + GETGX_empty_VXEX(v0, v1, v2, 0); + u8 = F8; + VFMULQD(v0, v1, v2); + // mask some, duplicate all, mask some + for(int i=0; i<2; ++i) + if(!(u8&(1<<(4+i)))) { + VMOVQDfrom(v0, i, xZR); + } + FADDPD(v0, v0); + VDUPQ_64(v0, v0, 0); + for(int i=0; i<2; ++i) + if(!(u8&(1<<i))) { + VMOVQDfrom(v0, i, xZR); + } + if(!vex.l) YMM0(gd); + break; + case 0x44: INST_NAME("PCLMULQDQ Gx, Vx, Ex, Ib"); nextop = F8; @@ -784,6 +804,30 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip if(!vex.l) YMM0(gd); break; + case 0xDF: + INST_NAME("VAESKEYGENASSIST Gx, Ex, Ib"); + nextop = F8; + GETG; + sse_forget_reg(dyn, ninst, gd); + MOV32w(x1, gd); // gx + if(MODREG) { + ed = (nextop&7)+(rex.b<<3); + sse_forget_reg(dyn, ninst, ed); + MOV32w(x2, ed); + MOV32w(x3, 0); //p = NULL + } else { + MOV32w(x2, 0); + addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 1); + if(ed!=x3) { + MOVx_REG(x3, ed); + } + } + u8 = F8; + MOV32w(x4, u8); + CALL(native_aeskeygenassist, -1); + if(!vex.l) YMM0(gd); + break; + default: DEFAULT; } diff --git a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c index 191b3581..fbee03e3 100644 --- a/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c +++ b/src/dynarec/arm64/dynarec_arm64_avx_f2_0f38.c @@ -60,7 +60,36 @@ uintptr_t dynarec64_AVX_F2_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip rex_t rex = vex.rex; switch(opcode) { - + case 0xF5: + INST_NAME("PDEP Gd, Ed, Vd"); + nextop = F8; + GETGD; + GETED(0); + GETVD; + if(gd==ed || gd==vd) { + gb1 = gd; + gd = x4; + } else { + gb1 = 0; + } + // x3 = mask of mask, loop while not 0 + MOV32w(gd, 0); + MOV64x(x2, 1); + MOV64x(x3, 1); + MARK; + TSTxw_REG(ed, x3); + B_MARK2(cEQ); + TSTxw_REG(vd, x2); + B_MARK3(cEQ); + ORRxw_REG(gd, gd, x3); + MARK3; + LSLxw_IMM(x2, x2, 1); + MARK2; + LSLxw_IMM(x3, x3, 1); + CBNZxw_MARK(x3); + if(gb1) + MOVxw_REG(gb1, gd); + break; case 0xF6: INST_NAME("MULX Gd, Vd, Ed (,RDX)"); nextop = F8; |