about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorphorcys <phorcys@126.com>2025-07-17 14:10:07 +0800
committerGitHub <noreply@github.com>2025-07-17 08:10:07 +0200
commita960e983945e2ce72b3648fde042f70e0fcf8c48 (patch)
treec134e8dec8ab5fb5d805a62ceadc06b17cdbb9b0 /src
parentef2f960d4ba7557b2baa26cd65b012b9a3363cbd (diff)
downloadbox64-a960e983945e2ce72b3648fde042f70e0fcf8c48.tar.gz
box64-a960e983945e2ce72b3648fde042f70e0fcf8c48.zip
[LA64_DYNAREC] Add la64 avx pack/unpack ops , part 1. (#2818)
VPACKSSWB
VPACKSSDW
VPACKUSWB
VPACKUSDW
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f.c49
-rw-r--r--src/dynarec/la64/dynarec_la64_avx_66_0f38.c20
-rw-r--r--src/dynarec/la64/la64_emitter.h20
3 files changed, 88 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
index 27d4cab9..33519001 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c
@@ -251,6 +251,55 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip,
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VXOR_Vxy(v0, v1, v2);
             break;
+        case 0x63:
+            INST_NAME("VPACKSSWB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            if (v1 == v2) {
+                VSATxy(H, v0, v1, 7);
+                VPICKEVxy(B, v0, v0, v0);
+            } else {
+                VSATxy(H, q0, v2, 7);
+                VSATxy(H, v0, v1, 7);
+                VPICKEVxy(B, v0, q0, v0);
+            }
+            break;
+        case 0x67:
+            INST_NAME("VPACKUSWB Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            q1 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VLDIxy(q0, 0b0010011111111); // broadcast 0xff as 16-bit elements to all lanes
+            if (v1 == v2) {
+                VMAXIxy(H, d0, v1, 0);
+                VMINxy(H, d0, v1, q0);
+                VPICKEVxy(B, v0, d0, d0);
+            } else {
+                VMAXIxy(H, d0, v1, 0);
+                VMAXIxy(H, q1, v2, 0);
+                VMINxy(H, d0, d0, q0);
+                VMINxy(H, q1, q1, q0);
+                VPICKEVxy(B, v0, q1, d0);
+            }
+            break;
+        case 0x6B:
+            INST_NAME("VPACKSSDW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            if (v1 == v2) {
+                VSATxy(W, d0, v1, 15);
+                VPICKEVxy(H, v0, d0, d0);
+            } else {
+                VSATxy(W, d0, v1, 15);
+                VSATxy(W, q0, v2, 15);
+                VPICKEVxy(H, v0, q0, d0);
+            }
+            break;
         case 0x6E:
             INST_NAME("VMOVD Gx, Ed");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
index c411dc48..2109223f 100644
--- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
+++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c
@@ -309,6 +309,26 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i
             GETGY_empty_VYEY_xy(v0, v1, v2, 0);
             VMULWEVxy(D_W, v0, v1, v2);
             break;
+        case 0x2B:
+            INST_NAME("VPACKUSDW Gx, Vx, Ex");
+            nextop = F8;
+            GETGY_empty_VYEY_xy(v0, v1, v2, 0);
+            q0 = fpu_get_scratch(dyn);
+            q1 = fpu_get_scratch(dyn);
+            d0 = fpu_get_scratch(dyn);
+            VLDIxy(q0, 0b0010011111111); // broadcast 0xff as 16-bit elements to all lanes
+            if (v1 == v2) {
+                VMAXIxy(W, v0, v1, 0);
+                VMINxy(W, v0, v1, q0);
+                VPICKEVxy(H, v0, v0, v0);
+            } else {
+                VMAXIxy(W, q1, v2, 0);
+                VMAXIxy(W, v0, v1, 0);
+                VMINxy(W, q1, q1, q0);
+                VMINxy(W, v0, v0, q0);
+                VPICKEVxy(H, v0, q1, v0);
+            }
+            break;
         case 0x2C:
             INST_NAME("VMASKMOVPS Gx, Vx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 40aa62d0..f6a4d8ad 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -2763,7 +2763,7 @@ LSX instruction starts with V, LASX instruction starts with XV.
         }                   \
     } while (0)
 
-#define VREPLVE0xy(width, vd, vj)          \
+#define VREPLVE0xy(width, vd, vj)        \
     do {                                 \
         if (vex.l) {                     \
             XVREPLVE0_##width(vd, vj);   \
@@ -2772,6 +2772,15 @@ LSX instruction starts with V, LASX instruction starts with XV.
         }                                \
     } while (0)
 
+#define VMAXIxy(width, vd, vj, imm)      \
+    do {                                 \
+        if (vex.l) {                     \
+            XVMAXI_##width(vd, vj, imm); \
+        } else {                         \
+            VMAXI_##width(vd, vj, imm);  \
+        }                                \
+    } while (0)
+
 #define VMINIxy(width, vd, vj, imm)      \
     do {                                 \
         if (vex.l) {                     \
@@ -2979,4 +2988,13 @@ LSX instruction starts with V, LASX instruction starts with XV.
         }                               \
     } while (0)
 
+#define VSATxy(width, vd, vj, imm)      \
+    do {                                \
+        if (vex.l) {                    \
+            XVSAT_##width(vd, vj, imm); \
+        } else {                        \
+            VSAT_##width(vd, vj, imm);  \
+        }                               \
+    } while (0)
+
 #endif //__ARM64_EMITTER_H__