diff options
| author | phorcys <phorcys@126.com> | 2025-08-15 20:16:58 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-15 14:16:58 +0200 |
| commit | 270ce3750e619cd374f7a93fef2dd096f3929715 (patch) | |
| tree | 0b98de3f727ca815c2d44e93b4cc91170c12a82c /src | |
| parent | 83e2427bfef1e675018205a21e72c9438e3814ab (diff) | |
| download | box64-270ce3750e619cd374f7a93fef2dd096f3929715.tar.gz box64-270ce3750e619cd374f7a93fef2dd096f3929715.zip | |
[LA64_DYNAREC] Add la64 BMI/BMI2 ops. (#2933)
VEX.0F.38 BLSR, BLSMSK, BLSI, BZHI, BEXTR VEX.F2.0F.38 PDEP, MULX VEX.F3.0F.38 PEXT
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_0f38.c | 152 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f2_0f38.c | 53 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f3_0f38.c | 30 |
3 files changed, 233 insertions, 2 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f38.c b/src/dynarec/la64/dynarec_la64_avx_0f38.c index fd2bcf5d..78545676 100644 --- a/src/dynarec/la64/dynarec_la64_avx_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_0f38.c @@ -71,8 +71,8 @@ uintptr_t dynarec64_AVX_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, CLEAR_FLAGS(x6); IFX (X_SF) { SRLI_D(x6, gd, rex.w ? 63 : 31); - BEQZ(x6, 8); - ORI(xFlags, xFlags, 1 << F_SF); + SLLI_D(x6, x6, F_SF); + OR(xFlags, xFlags, x6); } IFX (X_ZF) { BNEZ(gd, 8); @@ -80,6 +80,154 @@ uintptr_t dynarec64_AVX_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, } SPILL_EFLAGS(); break; + case 0xF3: + nextop = F8; + switch ((nextop >> 3) & 7) { + case 1: + INST_NAME("BLSR Vd, Ed"); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + GETED(0); + GETVD; + CLEAR_FLAGS(x6); + IFX (X_CF) { + BNEZ(ed, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + ADDIxw(x3, ed, -1); + AND(vd, ed, x3); + if (!rex.w) { + BSTRPICK_D(vd, vd, 31, 0); + } + IFX (X_ZF) { + BNEZ(vd, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_SF) { + BSTRPICK_D(x5, vd, rex.w ? 63 : 31, rex.w ? 63 : 31); + SLLI_D(x5, x5, F_SF); + OR(xFlags, xFlags, x5); + } + SPILL_EFLAGS(); + break; + case 2: + INST_NAME("BLSMSK Vd, Ed"); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + GETED(0); + GETVD; + CLEAR_FLAGS(x6); + IFX (X_CF) { + BNEZ(ed, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + ADDIxw(x3, ed, -1); + XOR(vd, ed, x3); + if (!rex.w) { + BSTRPICK_D(vd, vd, 31, 0); + } + IFX (X_SF) { + BSTRPICK_D(x5, vd, rex.w ? 63 : 31, rex.w ? 63 : 31); + SLLI_D(x5, x5, F_SF); + OR(xFlags, xFlags, x5); + } + SPILL_EFLAGS(); + break; + case 3: + INST_NAME("BLSI Vd, Ed"); + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + GETED(0); + GETVD; + CLEAR_FLAGS(x6); + IFX (X_CF) { + BEQZ(ed, 8); + ORI(xFlags, xFlags, 1 << F_CF); + } + SUBxw(x3, xZR, ed); + AND(vd, ed, x3); + if (!rex.w) { + BSTRPICK_D(vd, vd, 31, 0); + } + IFX (X_ZF) { + BNEZ(vd, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_SF) { + BSTRPICK_D(x5, vd, rex.w ? 63 : 31, rex.w ? 63 : 31); + SLLI_D(x5, x5, F_SF); + OR(xFlags, xFlags, x5); + } + SPILL_EFLAGS(); + break; + default: + DEFAULT; + } + break; + + case 0xF5: + INST_NAME("BZHI Gd, Ed, Vd"); + nextop = F8; + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + GETGD; + GETED(0); + GETVD; + CLEAR_FLAGS(x6); + BSTRPICK_D(x4, vd, 7, 0); + MOV64x(x5, rex.w ? 64 : 32); + BGE_MARK(x4, x5); + ADDI_D(x6, xZR, -1); + SLL_D(x6, x6, x4); + ANDN(gd, ed, x6); + B_MARK2_nocond; + MARK; + OR(gd, ed, ed); + IFX (X_CF) { + ORI(xFlags, xFlags, 1 << F_CF); + } + MARK2; + if (!rex.w) { + BSTRPICK_D(gd, gd, 31, 0); + } + IFX (X_ZF) { + BNEZ(gd, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + IFX (X_SF) { + BSTRPICK_D(x5, gd, rex.w ? 63 : 31, rex.w ? 63 : 31); + SLLI_D(x5, x5, F_SF); + OR(xFlags, xFlags, x5); + } + SPILL_EFLAGS(); + break; + + case 0xF7: + INST_NAME("BEXTR Gd, Vd, Ed"); + nextop = F8; + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + GETGD; + GETED(0); + GETVD; + BSTRPICK_D(x4, vd, 7, 0); // start + BSTRPICK_D(x3, vd, 15, 8); // length + ADDI_D(x5, xZR, 0); + BEQZ_MARK(x3); + MOV64xw(x6, rex.w ? 64 : 32); + BGE_MARK(x4, x6); + BLT_MARK(x6, x3); + SRLxw(x5, ed, x4); + SUBxw(x6, x6, x3); + SLLxw(x5, x5, x6); + SRLxw(x5, x5, x6); + MARK; + OR(gd, x5, x5); + if (!rex.w) { + BSTRPICK_D(gd, gd, 31, 0); + } + CLEAR_FLAGS(x6); + IFX (X_ZF) { + BNEZ(gd, 8); + ORI(xFlags, xFlags, 1 << F_ZF); + } + SPILL_EFLAGS(); + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c index f8a29542..74dcb59a 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c @@ -57,6 +57,59 @@ uintptr_t dynarec64_AVX_F2_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i rex_t rex = vex.rex; switch (opcode) { + case 0xF5: + INST_NAME("PDEP Gd, Vd, Ed"); + nextop = F8; + GETGD; + GETVD; + GETED(0); + if (gd == ed || gd == vd) { + gb1 = gd; + gd = x6; + } else { + gb1 = 0; + } + MOV64x(gd, 0); + MOV64x(x3, 1); + MOV64x(x4, 1); + MARK; + AND(x5, ed, x4); + BEQZ_MARK2(x5); + AND(x5, vd, x3); + BEQZ_MARK3(x5); + OR(gd, gd, x4); + MARK3; + SLLIxw(x3, x3, 1); + MARK2; + SLLIxw(x4, x4, 1); + BNEZ_MARK(x4); + if (gb1) + OR(gb1, gd, gd); + break; + case 0xF6: + INST_NAME("MULX Gd, Vd, Ed (,RDX)"); + nextop = F8; + GETGD; + GETED(0); + GETVD; + if ((gd == xRDX) || (gd == ed) || (gd == vd)) + gb1 = x3; + else + gb1 = gd; + if (rex.w) { + MULH_DU(gb1, xRDX, ed); + if (gd != vd) { MUL_D(vd, xRDX, ed); } + if (gb1 == x3) { + OR(gd, gb1, gb1); + } + } else { + MULH_WU(gb1, xRDX, ed); + if (gd != vd) { MUL_W(vd, xRDX, ed); } + } + if (gb1 == x3) { + BSTRINS_D(gd, gb1, 31, 0); + } + break; case 0xF7: INST_NAME("SHRX Gd, Ed, Vd"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c index 8e8e6781..7dfaf00b 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c @@ -57,6 +57,36 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i rex_t rex = vex.rex; switch (opcode) { + case 0xF5: + INST_NAME("PEXT Gd, Vd, Ed"); + nextop = F8; + GETGD; + GETVD; + GETED(0); + if (gd == ed || gd == vd) { + gb1 = gd; + gd = x6; + } else { + gb1 = 0; + } + MOV64x(gd, 0); + MOV64x(x3, 1); + MOV64x(x4, 1); + MARK; + AND(x5, ed, x4); + BEQZ_MARK2(x5); + AND(x5, vd, x4); + BEQZ_MARK3(x5); + OR(gd, gd, x3); + MARK3; + SLLIxw(x3, x3, 1); + MARK2; + SLLIxw(x4, x4, 1); + BNEZ_MARK(x4); + if (gb1) + OR(gb1, gd, gd); + break; + case 0xF7: INST_NAME("SARX Gd, Ed, Vd"); nextop = F8; |