about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-07-14 14:54:16 +0800
committerGitHub <noreply@github.com>2025-07-14 08:54:16 +0200
commitce08e8e27f7fda2ff2c02af215f3b8e16d3f0576 (patch)
treea82ec9f1537c149ba6db3bf5a07d40d71dfc6936
parent6960cfa56fd786bc02c509cf62bcbc815fc672b1 (diff)
downloadbox64-ce08e8e27f7fda2ff2c02af215f3b8e16d3f0576.tar.gz
box64-ce08e8e27f7fda2ff2c02af215f3b8e16d3f0576.zip
[LA64_DYNAREC] Add la64 avx shift ops. (#2806)
*  VEX.66.0f    VPSRLW/VPSRLDVPSRLQ/VPSRAW/VPSRAD/VPSLLW/VPSLLD/VPSLLQ
  *  VEX.66.0f.3a VPSRLVD/VPSRLVQ/VPSRAVDVPSLLVD/VPSLLVQ
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c90
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f38.c42
-rw-r--r--src/dynarec/la64/la64_emitter.h74
3 files changed, 205 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 0c707a28..26b6f684 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -457,6 +457,42 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0xD1:
+            INST_NAME("VPSRLW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VREPLVE0xy(H, d0, v2);
+            VSLEIxy(DU, q0, q0, 15);
+            VSRLxy(H, v0, v1, d0);
+            VAND_Vxy(v0, v0, q0);
+            break;
+        case 0xD2:
+            INST_NAME("VPSRLD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VREPLVE0xy(W, d0, v2);
+            VSLEIxy(DU, q0, q0, 31);
+            VSRLxy(W, v0, v1, d0);
+            VAND_Vxy(v0, v0, q0);
+            break;
+        case 0xD3:
+            INST_NAME("VPSRLQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0 ,v2);
+            VLDIxy(d0, (0b011 << 10) | 0x3f);
+            VSLExy(DU, d0, q0, d0);
+            VSRLxy(D, v0, v1, q0);
+            VAND_Vxy(v0, v0, d0);
+            break;
         case 0xD6:
             INST_NAME("VMOVD Ex, Gx");
             nextop = F8;
@@ -499,6 +535,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VANDN_Vxy(v0, v1, v2);
             break;
+        case 0xE1:
+            INST_NAME("VPSRAW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d0 = fpu_get_scratch(dyn);
+            VMINIxy(DU, d0, v2, 15);
+            VREPLVE0xy(H, d0, d0);
+            VSRAxy(H, v0, v1, d0);            
+            break;
+        case 0xE2:
+            INST_NAME("VPSRAD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d0 = fpu_get_scratch(dyn);
+            VMINIxy(DU, d0, v2, 31);
+            VREPLVE0xy(W, d0, d0);
+            VSRAxy(W, v0, v1, d0);            
+            break;
         case 0xE7:
             INST_NAME("VMOVNTDQ Ex, Gx");
             nextop = F8;
@@ -527,6 +581,42 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VXOR_Vxy(v0, v1, v2);
             break;
+        case 0xF1:
+            INST_NAME("VPSLLW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VSLEIxy(DU, q0, q0, 15);
+            VREPLVE0xy(H, d0, v2);
+            VSLLxy(H, v0, v1, d0);
+            VAND_Vxy(v0, v0, q0);
+            break;
+        case 0xF2:
+            INST_NAME("VPSLLD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VSLEIxy(DU, q0, q0, 31);
+            VREPLVE0xy(W, d0, v2);
+            VSLLxy(W, v0, v1, d0);
+            VAND_Vxy(v0, v0, q0);
+            break;
+        case 0xF3:
+            INST_NAME("VPSLLQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VREPLVE0xy(D, q0, v2);
+            VLDIxy(d0, (0b011 << 10) | 0x3f);
+            VSLExy(DU, d0, q0, d0);
+            VSLLxy(D, v0, v1, q0);
+            VAND_Vxy(v0, v0, d0);
+            break;
         case 0xF7:
             INST_NAME("VMASKMOVDQU Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
index e8f82715..1e561b67 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
@@ -307,6 +307,48 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 VSLLWIL_DU_WU(q0, q1, 0);
             }
             break;
+        case 0x45:
+            INST_NAME("VPSRLVD/Q Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d1 = fpu_get_scratch(dyn);
+            if (rex.w) {
+                d0 = fpu_get_scratch(dyn);
+                VLDIxy(d0, (0b011 << 10) | 63);
+                VSLExy(DU, d1, v2, d0);
+                VSRLxy(D, v0, v1, v2);
+                VAND_Vxy(v0, v0, d1);
+            } else {
+                VSLEIxy(WU, d1, v2, 31);
+                VSRLxy(W, v0, v1, v2);
+                VAND_Vxy(v0, v0, d1);
+            }
+            break;
+        case 0x46:
+            INST_NAME("VPSRAVD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d0 = fpu_get_scratch(dyn);
+            VMINIxy(WU, d0, v2, 31);
+            VSRAxy(W, v0, v1, d0);
+            break;
+        case 0x47:
+            INST_NAME("VPSLLVD/Q Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            d1 = fpu_get_scratch(dyn);
+            if (rex.w) {
+                d0 = fpu_get_scratch(dyn);
+                VLDIxy(d0, (0b011 << 10) | 63);
+                VSLExy(DU, d1, v2, d0);
+                VSLLxy(D, v0, v1, v2);
+                VAND_Vxy(v0, v0, d1);
+            } else {
+                VSLEIxy(WU, d1, v2, 31);
+                VSLLxy(W, v0, v1, v2);
+                VAND_Vxy(v0, v0, d1);
+            }
+            break;
         case 0x8C:
             INST_NAME("VPMASKMOVD/Q Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 0472481b..692b8fc8 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -1530,7 +1530,6 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VFNMADD_D(vd, vj, vk, va)    EMIT(type_4R(0b000010011010, va, vk, vj, vd))
 #define VFNMSUB_D(vd, vj, vk, va)    EMIT(type_4R(0b000010011110, va, vk, vj, vd))
 
-
 #define XVADD_B(vd, vj, vk)          EMIT(type_3R(0b01110100000010100, vk, vj, vd))
 #define XVADD_H(vd, vj, vk)          EMIT(type_3R(0b01110100000010101, vk, vj, vd))
 #define XVADD_W(vd, vj, vk)          EMIT(type_3R(0b01110100000010110, vk, vj, vd))
@@ -2239,6 +2238,7 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define XVMINI_DU(xd, xj, imm5)      EMIT(type_2RI5(0b01110110100101111, imm5, xj, xd))
 #define XVFRSTPI_B(xd, xj, imm5)     EMIT(type_2RI5(0b01110110100110100, imm5, xj, xd))
 #define XVFRSTPI_H(xd, xj, imm5)     EMIT(type_2RI5(0b01110110100110101, imm5, xj, xd))
+#define XVLDI(xd, imm13)             EMIT(type_1RI13(0b01110111111000, imm13, xd))
 
 #define XVFMADD_S(xd, xj, xk, xa)  EMIT(type_4R(0b000010100001, xa, xk, xj, xd))
 #define XVFMSUB_S(xd, xj, xk, xa)  EMIT(type_4R(0b000010100101, xa, xk, xj, xd))
@@ -2702,4 +2702,76 @@ LSX instruction starts with V, LASX instruction starts with XV.
             VSRAI_##width(vd, vj, imm);  \
         }                                \
     } while (0)
+
+#define VSLLxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSLL_##width(vd, vj, vk); \
+        } else {                       \
+            VSLL_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSRLxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSRL_##width(vd, vj, vk); \
+        } else {                       \
+            VSRL_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSRAxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSRA_##width(vd, vj, vk); \
+        } else {                       \
+            VSRA_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSLEIxy(width, vd, vj, imm)      \
+    do {                                 \
+        if (vex.l) {                     \
+            XVSLEI_##width(vd, vj, imm); \
+        } else {                         \
+            VSLEI_##width(vd, vj, imm);  \
+        }                                \
+    } while (0)
+
+#define VSLExy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSLE_##width(vd, vj, vk); \
+        } else {                       \
+            VSLE_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VLDIxy(vd, imm)     \
+    do {                    \
+        if (vex.l) {        \
+            XVLDI(vd, imm); \
+        } else {            \
+            VLDI(vd, imm);  \
+        }                   \
+    } while (0)
+
+#define VREPLVE0xy(width, vd, vj)          \
+    do {                                 \
+        if (vex.l) {                     \
+            XVREPLVE0_##width(vd, vj);   \
+        } else {                         \
+            VREPLVEI_##width(vd, vj, 0); \
+        }                                \
+    } while (0)
+
+#define VMINIxy(width, vd, vj, imm)      \
+    do {                                 \
+        if (vex.l) {                     \
+            XVMINI_##width(vd, vj, imm); \
+        } else {                         \
+            VMINI_##width(vd, vj, imm);  \
+        }                                \
+    } while (0)
 #endif //__ARM64_EMITTER_H__