about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-07-14 17:33:26 +0800
committerGitHub <noreply@github.com>2025-07-14 11:33:26 +0200
commit7a4583699b5e54f5e4efe23c7920cf2993e3e1f9 (patch)
treec9cdeeecda3c4f7f9944c7ec03bcaeae2d475a18 /src
parent26851574ad46438c8caca62717afab764395a465 (diff)
downloadbox64-7a4583699b5e54f5e4efe23c7920cf2993e3e1f9.tar.gz
box64-7a4583699b5e54f5e4efe23c7920cf2993e3e1f9.zip
[LA64_DYNAREC] Add la64 avx arith ops , part 1. (#2814)
Add 1:1 avx arith ops.
  *  VP{ADD,SUB}{B,W,D,Q,SB,SW,USB,USW}
  *  VPMUL{DQ,,HW,HUW,Lw,LD,LUDQ}
  *  V{MAX,MIN}{UB,UW,UD,SB,SW,SD}
  *  VAVG{B,W}
  *  VSIGN{B,W,D}
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c162
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f38.c84
-rw-r--r--src/dynarec/la64/la64_emitter.h117
3 files changed, 357 insertions, 6 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 68d69241..acc0ca9c 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -487,12 +487,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             q0 = fpu_get_scratch(dyn);
             d0 = fpu_get_scratch(dyn);
-            VREPLVE0xy(D, q0 ,v2);
+            VREPLVE0xy(D, q0, v2);
             VLDIxy(d0, (0b011 << 10) | 0x3f);
             VSLExy(DU, d0, q0, d0);
             VSRLxy(D, v0, v1, q0);
             VAND_Vxy(v0, v0, d0);
             break;
+        case 0xD4:
+            INST_NAME("VPADDQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VADDxy(D, v0, v1, v2);
+            break;
+        case 0xD5:
+            INST_NAME("VPMULLW Gx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMULxy(H, v0, v1, v2);
+            break;
         case 0xD6:
             INST_NAME("VMOVD Ex, Gx");
             nextop = F8;
@@ -523,18 +535,60 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 VPICKVE2GR_DU(gd, d1, 0);
             }
             break;
+        case 0xD8:
+            INST_NAME("VPSUBUSB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSSUBxy(BU, v0, v1, v2);
+            break;
+        case 0xD9:
+            INST_NAME("VPSUBUSW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSSUBxy(HU, v0, v1, v2);
+            break;
+        case 0xDA:
+            INST_NAME("VPMINUB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMINxy(BU, v0, v1, v2);
+            break;
         case 0xDB:
             INST_NAME("VPAND Gx, Vx, Ex");
             nextop = F8;
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VAND_Vxy(v0, v1, v2);
             break;
+        case 0xDC:
+            INST_NAME("VPADDUSB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSADDxy(BU, v0, v1, v2);
+            break;
+        case 0xDD:
+            INST_NAME("VPADDUSW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSADDxy(HU, v0, v1, v2);
+            break;
+        case 0xDE:
+            INST_NAME("VPMAXUB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMAXxy(BU, v0, v1, v2);
+            break;
         case 0xDF:
             INST_NAME("VPANDN Gx, Vx, Ex");
             nextop = F8;
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VANDN_Vxy(v0, v1, v2);
             break;
+        case 0xE0:
+            INST_NAME("VPAVGB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VAVGRxy(BU, v0, v1, v2);
+            break;
         case 0xE1:
             INST_NAME("VPSRAW Gx, Vx, Ex");
             nextop = F8;
@@ -542,7 +596,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             d0 = fpu_get_scratch(dyn);
             VMINIxy(DU, d0, v2, 15);
             VREPLVE0xy(H, d0, d0);
-            VSRAxy(H, v0, v1, d0);            
+            VSRAxy(H, v0, v1, d0);
             break;
         case 0xE2:
             INST_NAME("VPSRAD Gx, Vx, Ex");
@@ -551,7 +605,25 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             d0 = fpu_get_scratch(dyn);
             VMINIxy(DU, d0, v2, 31);
             VREPLVE0xy(W, d0, d0);
-            VSRAxy(W, v0, v1, d0);            
+            VSRAxy(W, v0, v1, d0);
+            break;
+        case 0xE3:
+            INST_NAME("VPAVGW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VAVGRxy(HU, v0, v1, v2);
+            break;
+        case 0xE4:
+            INST_NAME("VPMULHUW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMUHxy(HU, v0, v1, v2);
+            break;
+        case 0xE5:
+            INST_NAME("VPMULHW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMUHxy(H, v0, v1, v2);
             break;
         case 0xE7:
             INST_NAME("VMOVNTDQ Ex, Gx");
@@ -569,12 +641,48 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
                 SMWRITE2();
             }
             break;
+        case 0xE8:
+            INST_NAME("VPSUBSB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSSUBxy(B, v0, v1, v2);
+            break;
+        case 0xE9:
+            INST_NAME("VPSUBSW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSSUBxy(H, v0, v1, v2);
+            break;
+        case 0xEA:
+            INST_NAME("VPMINSW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMINxy(H, v0, v1, v2);
+            break;
         case 0xEB:
             INST_NAME("VPOR Gx, Vx, Ex");
             nextop = F8;
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VOR_Vxy(v0, v1, v2);
             break;
+        case 0xEC:
+            INST_NAME("VPADDSB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSADDxy(B, v0, v1, v2);
+            break;
+        case 0xED:
+            INST_NAME("VPADDSW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSADDxy(H, v0, v1, v2);
+            break;
+        case 0xEE:
+            INST_NAME("VPMAXSW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMAXxy(H, v0, v1, v2);
+            break;
         case 0xEF:
             INST_NAME("VPXOR Gx, Vx, Ex");
             nextop = F8;
@@ -617,6 +725,12 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             VSLLxy(D, v0, v1, q0);
             VAND_Vxy(v0, v0, d0);
             break;
+        case 0xF4:
+            INST_NAME("VPMULLUDQ Gx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMULWEVxy(D_WU, v0, v1, v2);
+            break;
         case 0xF7:
             INST_NAME("VMASKMOVDQU Gx, Ex");
             nextop = F8;
@@ -629,6 +743,48 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             VBITSEL_V(q0, q0, v0, q1); // sel v0 if mask is 1
             VST(q0, xRDI, 0);
             break;
+        case 0xF8:
+            INST_NAME("VPSUBB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSUBxy(B, v0, v1, v2);
+            break;
+        case 0xF9:
+            INST_NAME("VPSUBW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSUBxy(H, v0, v1, v2);
+            break;
+        case 0xFA:
+            INST_NAME("VPSUBD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSUBxy(W, v0, v1, v2);
+            break;
+        case 0xFB:
+            INST_NAME("VPSUBQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSUBxy(D, v0, v1, v2);
+            break;
+        case 0xFC:
+            INST_NAME("VPADDB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VADDxy(B, v0, v1, v2);
+            break;
+        case 0xFD:
+            INST_NAME("VPADDW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VADDxy(H, v0, v1, v2);
+            break;
+        case 0xFE:
+            INST_NAME("VPADDD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VADDxy(W, v0, v1, v2);
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
index 51696bc7..6e794734 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
@@ -57,6 +57,24 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
     rex_t rex = vex.rex;
 
     switch (opcode) {
+        case 0x08:
+            INST_NAME("VPSIGNB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSIGNCOVxy(B, v0, v2, v1);
+            break;
+        case 0x09:
+            INST_NAME("VPSIGNW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSIGNCOVxy(H, v0, v2, v1);
+            break;
+        case 0x0A:
+            INST_NAME("VPSIGND Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VSIGNCOVxy(W, v0, v2, v1);
+            break;
         case 0x18:
             INST_NAME("VBROADCASTSS Gx, Ex");
             nextop = F8;
@@ -91,7 +109,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             if (vex.l) {
                 GETEYx(q1, 0, 0);
                 GETGYy_empty(q0);
-                VEXT2XV_H_B(q0, q1);                
+                VEXT2XV_H_B(q0, q1);
             } else {
                 GETEYSD(q1, 0, 0);
                 GETGYx_empty(q0);
@@ -134,7 +152,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             if (vex.l) {
                 GETEYx(q1, 0, 0);
                 GETGYy_empty(q0);
-                VEXT2XV_W_H(q0, q1);                
+                VEXT2XV_W_H(q0, q1);
             } else {
                 GETEYSD(q1, 0, 0);
                 GETGYx_empty(q0);
@@ -158,13 +176,19 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             if (vex.l) {
                 GETEYx(q1, 0, 0);
                 GETGYy_empty(q0);
-                VEXT2XV_D_W(q0, q1);                
+                VEXT2XV_D_W(q0, q1);
             } else {
                 GETEYSD(q1, 0, 0);
                 GETGYx_empty(q0);
                 VSLLWIL_D_W(q0, q1, 0);
             }
             break;
+        case 0x28:
+            INST_NAME("VPMULDQ Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMULWEVxy(D_W, v0, v1, v2);
+            break;
         case 0x2C:
             INST_NAME("VMASKMOVPS Gx, Vx, Ex");
             nextop = F8;
@@ -307,6 +331,60 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
                 VSLLWIL_DU_WU(q0, q1, 0);
             }
             break;
+        case 0x38:
+            INST_NAME("VPMINSB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMINxy(B, v0, v1, v2);
+            break;
+        case 0x39:
+            INST_NAME("VPMINSD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMINxy(W, v0, v1, v2);
+            break;
+        case 0x3A:
+            INST_NAME("VPMINUW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMINxy(HU, v0, v1, v2);
+            break;
+        case 0x3B:
+            INST_NAME("VPMINUD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMINxy(WU, v0, v1, v2);
+            break;
+        case 0x3C:
+            INST_NAME("VPMAXSB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMAXxy(B, v0, v1, v2);
+            break;
+        case 0x3D:
+            INST_NAME("VPMAXSD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMAXxy(W, v0, v1, v2);
+            break;
+        case 0x3E:
+            INST_NAME("VPMAXUW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMAXxy(HU, v0, v1, v2);
+            break;
+        case 0x3F:
+            INST_NAME("VPMAXUD Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMAXxy(WU, v0, v1, v2);
+            break;
+        case 0x40:
+            INST_NAME("VPMULLD Gx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            VMULxy(W, v0, v1, v2);
+            break;
         case 0x45:
             INST_NAME("VPSRLVD/Q Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 692b8fc8..ea78e328 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -2774,4 +2774,121 @@ LSX instruction starts with V, LASX instruction starts with XV.
             VMINI_##width(vd, vj, imm);  \
         }                                \
     } while (0)
+
+#define VADDxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVADD_##width(vd, vj, vk); \
+        } else {                       \
+            VADD_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSUBxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVSUB_##width(vd, vj, vk); \
+        } else {                       \
+            VSUB_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSADDxy(width, vd, vj, vk)      \
+    do {                                \
+        if (vex.l) {                    \
+            XVSADD_##width(vd, vj, vk); \
+        } else {                        \
+            VSADD_##width(vd, vj, vk);  \
+        }                               \
+    } while (0)
+
+#define VSSUBxy(width, vd, vj, vk)      \
+    do {                                \
+        if (vex.l) {                    \
+            XVSSUB_##width(vd, vj, vk); \
+        } else {                        \
+            VSSUB_##width(vd, vj, vk);  \
+        }                               \
+    } while (0)
+
+#define VMULxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVMUL_##width(vd, vj, vk); \
+        } else {                       \
+            VMUL_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VMUHxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVMUH_##width(vd, vj, vk); \
+        } else {                       \
+            VMUH_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VMULWEVxy(width, vd, vj, vk)      \
+    do {                                  \
+        if (vex.l) {                      \
+            XVMULWEV_##width(vd, vj, vk); \
+        } else {                          \
+            VMULWEV_##width(vd, vj, vk);  \
+        }                                 \
+    } while (0)
+
+#define VMULWODxy(width, vd, vj, vk)      \
+    do {                                  \
+        if (vex.l) {                      \
+            XVMULWOD_##width(vd, vj, vk); \
+        } else {                          \
+            VMULWOD_##width(vd, vj, vk);  \
+        }                                 \
+    } while (0)
+
+#define VMAXxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVMAX_##width(vd, vj, vk); \
+        } else {                       \
+            VMAX_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VMINxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVMIN_##width(vd, vj, vk); \
+        } else {                       \
+            VMIN_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VSIGNCOVxy(width, vd, vj, vk)      \
+    do {                                   \
+        if (vex.l) {                       \
+            XVSIGNCOV_##width(vd, vj, vk); \
+        } else {                           \
+            VSIGNCOV_##width(vd, vj, vk);  \
+        }                                  \
+    } while (0)
+
+#define VAVGxy(width, vd, vj, vk)      \
+    do {                               \
+        if (vex.l) {                   \
+            XVAVG_##width(vd, vj, vk); \
+        } else {                       \
+            VAVG_##width(vd, vj, vk);  \
+        }                              \
+    } while (0)
+
+#define VAVGRxy(width, vd, vj, vk)      \
+    do {                                \
+        if (vex.l) {                    \
+            XVAVGR_##width(vd, vj, vk); \
+        } else {                        \
+            VAVGR_##width(vd, vj, vk);  \
+        }                               \
+    } while (0)
 #endif //__ARM64_EMITTER_H__