about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-08-15 20:16:58 +0800
committerGitHub <noreply@github.com>2025-08-15 14:16:58 +0200
commit270ce3750e619cd374f7a93fef2dd096f3929715 (patch)
tree0b98de3f727ca815c2d44e93b4cc91170c12a82c /src
parent83e2427bfef1e675018205a21e72c9438e3814ab (diff)
downloadbox64-270ce3750e619cd374f7a93fef2dd096f3929715.tar.gz
box64-270ce3750e619cd374f7a93fef2dd096f3929715.zip
[LA64_DYNAREC] Add la64 BMI/BMI2 ops. (#2933)
VEX.0F.38  BLSR, BLSMSK, BLSI, BZHI, BEXTR
VEX.F2.0F.38 PDEP, MULX
VEX.F3.0F.38 PEXT
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_0f38.c152
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f2_0f38.c53
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_f3_0f38.c30
3 files changed, 233 insertions, 2 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_0f38.c b/src/dynarec/la64/dynarec_la64_avx_0f38.c
index fd2bcf5d..78545676 100644
--- a/src/dynarec/la64/dynarec_la64_avx_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_0f38.c
@@ -71,8 +71,8 @@ uintptr_t dynarec64_AVX_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             CLEAR_FLAGS(x6);
             IFX (X_SF) {
                 SRLI_D(x6, gd, rex.w ? 63 : 31);
-                BEQZ(x6, 8);
-                ORI(xFlags, xFlags, 1 << F_SF);
+                SLLI_D(x6, x6, F_SF);
+                OR(xFlags, xFlags, x6);
             }
             IFX (X_ZF) {
                 BNEZ(gd, 8);
@@ -80,6 +80,154 @@ uintptr_t dynarec64_AVX_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             }
             SPILL_EFLAGS();
             break;
+        case 0xF3:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 1:
+                    INST_NAME("BLSR Vd, Ed");
+                    SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+                    GETED(0);
+                    GETVD;
+                    CLEAR_FLAGS(x6);
+                    IFX (X_CF) {
+                        BNEZ(ed, 8);
+                        ORI(xFlags, xFlags, 1 << F_CF);
+                    }
+                    ADDIxw(x3, ed, -1);
+                    AND(vd, ed, x3);
+                    if (!rex.w) {
+                        BSTRPICK_D(vd, vd, 31, 0);
+                    }
+                    IFX (X_ZF) {
+                        BNEZ(vd, 8);
+                        ORI(xFlags, xFlags, 1 << F_ZF);
+                    }
+                    IFX (X_SF) {
+                        BSTRPICK_D(x5, vd, rex.w ? 63 : 31, rex.w ? 63 : 31);
+                        SLLI_D(x5, x5, F_SF);
+                        OR(xFlags, xFlags, x5);
+                    }
+                    SPILL_EFLAGS();
+                    break;
+                case 2:
+                    INST_NAME("BLSMSK Vd, Ed");
+                    SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+                    GETED(0);
+                    GETVD;
+                    CLEAR_FLAGS(x6);
+                    IFX (X_CF) {
+                        BNEZ(ed, 8);
+                        ORI(xFlags, xFlags, 1 << F_CF);
+                    }
+                    ADDIxw(x3, ed, -1);
+                    XOR(vd, ed, x3);
+                    if (!rex.w) {
+                        BSTRPICK_D(vd, vd, 31, 0);
+                    }
+                    IFX (X_SF) {
+                        BSTRPICK_D(x5, vd, rex.w ? 63 : 31, rex.w ? 63 : 31);
+                        SLLI_D(x5, x5, F_SF);
+                        OR(xFlags, xFlags, x5);
+                    }
+                    SPILL_EFLAGS();
+                    break;
+                case 3:
+                    INST_NAME("BLSI Vd, Ed");
+                    SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+                    GETED(0);
+                    GETVD;
+                    CLEAR_FLAGS(x6);
+                    IFX (X_CF) {
+                        BEQZ(ed, 8);
+                        ORI(xFlags, xFlags, 1 << F_CF);
+                    }
+                    SUBxw(x3, xZR, ed);
+                    AND(vd, ed, x3);
+                    if (!rex.w) {
+                        BSTRPICK_D(vd, vd, 31, 0);
+                    }
+                    IFX (X_ZF) {
+                        BNEZ(vd, 8);
+                        ORI(xFlags, xFlags, 1 << F_ZF);
+                    }
+                    IFX (X_SF) {
+                        BSTRPICK_D(x5, vd, rex.w ? 63 : 31, rex.w ? 63 : 31);
+                        SLLI_D(x5, x5, F_SF);
+                        OR(xFlags, xFlags, x5);
+                    }
+                    SPILL_EFLAGS();
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
+
+        case 0xF5:
+            INST_NAME("BZHI Gd, Ed, Vd");
+            nextop = F8;
+            SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+            GETGD;
+            GETED(0);
+            GETVD;
+            CLEAR_FLAGS(x6);
+            BSTRPICK_D(x4, vd, 7, 0);
+            MOV64x(x5, rex.w ? 64 : 32);
+            BGE_MARK(x4, x5);
+            ADDI_D(x6, xZR, -1);
+            SLL_D(x6, x6, x4);
+            ANDN(gd, ed, x6);
+            B_MARK2_nocond;
+            MARK;
+            OR(gd, ed, ed);
+            IFX (X_CF) {
+                ORI(xFlags, xFlags, 1 << F_CF);
+            }
+            MARK2;
+            if (!rex.w) {
+                BSTRPICK_D(gd, gd, 31, 0);
+            }
+            IFX (X_ZF) {
+                BNEZ(gd, 8);
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            IFX (X_SF) {
+                BSTRPICK_D(x5, gd, rex.w ? 63 : 31, rex.w ? 63 : 31);
+                SLLI_D(x5, x5, F_SF);
+                OR(xFlags, xFlags, x5);
+            }
+            SPILL_EFLAGS();
+            break;
+
+        case 0xF7:
+            INST_NAME("BEXTR Gd, Vd, Ed");
+            nextop = F8;
+            SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION);
+            GETGD;
+            GETED(0);
+            GETVD;
+            BSTRPICK_D(x4, vd, 7, 0);  // start
+            BSTRPICK_D(x3, vd, 15, 8); // length
+            ADDI_D(x5, xZR, 0);
+            BEQZ_MARK(x3);
+            MOV64xw(x6, rex.w ? 64 : 32);
+            BGE_MARK(x4, x6);
+            BLT_MARK(x6, x3);
+            SRLxw(x5, ed, x4);
+            SUBxw(x6, x6, x3);
+            SLLxw(x5, x5, x6);
+            SRLxw(x5, x5, x6);
+            MARK;
+            OR(gd, x5, x5);
+            if (!rex.w) {
+                BSTRPICK_D(gd, gd, 31, 0);
+            }
+            CLEAR_FLAGS(x6);
+            IFX (X_ZF) {
+                BNEZ(gd, 8);
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            SPILL_EFLAGS();
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c
index f8a29542..74dcb59a 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f38.c
@@ -57,6 +57,59 @@ uintptr_t dynarec64_AVX_F2_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
     rex_t rex = vex.rex;
 
     switch (opcode) {
+        case 0xF5:
+            INST_NAME("PDEP Gd, Vd, Ed");
+            nextop = F8;
+            GETGD;
+            GETVD;
+            GETED(0);
+            if (gd == ed || gd == vd) {
+                gb1 = gd;
+                gd = x6;
+            } else {
+                gb1 = 0;
+            }
+            MOV64x(gd, 0);
+            MOV64x(x3, 1);
+            MOV64x(x4, 1);
+            MARK;
+            AND(x5, ed, x4);
+            BEQZ_MARK2(x5);
+            AND(x5, vd, x3);
+            BEQZ_MARK3(x5);
+            OR(gd, gd, x4);
+            MARK3;
+            SLLIxw(x3, x3, 1);
+            MARK2;
+            SLLIxw(x4, x4, 1);
+            BNEZ_MARK(x4);
+            if (gb1)
+                OR(gb1, gd, gd);
+            break;
+        case 0xF6:
+            INST_NAME("MULX Gd, Vd, Ed (,RDX)");
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            GETVD;
+            if ((gd == xRDX) || (gd == ed) || (gd == vd))
+                gb1 = x3;
+            else
+                gb1 = gd;
+            if (rex.w) {
+                MULH_DU(gb1, xRDX, ed);
+                if (gd != vd) { MUL_D(vd, xRDX, ed); }
+                if (gb1 == x3) {
+                    OR(gd, gb1, gb1);
+                }
+            } else {
+                MULH_WU(gb1, xRDX, ed);
+                if (gd != vd) { MUL_W(vd, xRDX, ed); }
+            }
+            if (gb1 == x3) {
+                BSTRINS_D(gd, gb1, 31, 0);
+            }
+            break;
         case 0xF7:
             INST_NAME("SHRX Gd, Ed, Vd");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c
index 8e8e6781..7dfaf00b 100644
--- a/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f38.c
@@ -57,6 +57,36 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
     rex_t rex = vex.rex;
 
     switch (opcode) {
+        case 0xF5:
+            INST_NAME("PEXT Gd, Vd, Ed");
+            nextop = F8;
+            GETGD;
+            GETVD;
+            GETED(0);
+            if (gd == ed || gd == vd) {
+                gb1 = gd;
+                gd = x6;
+            } else {
+                gb1 = 0;
+            }
+            MOV64x(gd, 0);
+            MOV64x(x3, 1);
+            MOV64x(x4, 1);
+            MARK;
+            AND(x5, ed, x4);
+            BEQZ_MARK2(x5);
+            AND(x5, vd, x4);
+            BEQZ_MARK3(x5);
+            OR(gd, gd, x3);
+            MARK3;
+            SLLIxw(x3, x3, 1);
+            MARK2;
+            SLLIxw(x4, x4, 1);
+            BNEZ_MARK(x4);
+            if (gb1)
+                OR(gb1, gd, gd);
+            break;
+
         case 0xF7:
             INST_NAME("SARX Gd, Ed, Vd");
             nextop = F8;