diff options
| author | phorcys <phorcys@126.com> | 2025-07-17 14:10:07 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-17 08:10:07 +0200 |
| commit | a960e983945e2ce72b3648fde042f70e0fcf8c48 (patch) | |
| tree | c134e8dec8ab5fb5d805a62ceadc06b17cdbb9b0 /src | |
| parent | ef2f960d4ba7557b2baa26cd65b012b9a3363cbd (diff) | |
| download | box64-a960e983945e2ce72b3648fde042f70e0fcf8c48.tar.gz box64-a960e983945e2ce72b3648fde042f70e0fcf8c48.zip | |
[LA64_DYNAREC] Add la64 avx pack/unpack ops , part 1. (#2818)
VPACKSSWB VPACKSSDW VPACKUSWB VPACKUSDW
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 49 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 20 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 20 |
3 files changed, 88 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 27d4cab9..33519001 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -251,6 +251,55 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, GETGY_empty_VYEY_xy(v0, v1, v2, 0); VXOR_Vxy(v0, v1, v2); break; + case 0x63: + INST_NAME("VPACKSSWB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + if (v1 == v2) { + VSATxy(H, v0, v1, 7); + VPICKEVxy(B, v0, v0, v0); + } else { + VSATxy(H, q0, v2, 7); + VSATxy(H, v0, v1, 7); + VPICKEVxy(B, v0, q0, v0); + } + break; + case 0x67: + INST_NAME("VPACKUSWB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VLDIxy(q0, 0b0010011111111); // broadcast 0xff as 16-bit elements to all lanes + if (v1 == v2) { + VMAXIxy(H, d0, v1, 0); + VMINxy(H, d0, v1, q0); + VPICKEVxy(B, v0, d0, d0); + } else { + VMAXIxy(H, d0, v1, 0); + VMAXIxy(H, q1, v2, 0); + VMINxy(H, d0, d0, q0); + VMINxy(H, q1, q1, q0); + VPICKEVxy(B, v0, q1, d0); + } + break; + case 0x6B: + INST_NAME("VPACKSSDW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + if (v1 == v2) { + VSATxy(W, d0, v1, 15); + VPICKEVxy(H, v0, d0, d0); + } else { + VSATxy(W, d0, v1, 15); + VSATxy(W, q0, v2, 15); + VPICKEVxy(H, v0, q0, d0); + } + break; case 0x6E: INST_NAME("VMOVD Gx, Ed"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index c411dc48..2109223f 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -309,6 +309,26 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i GETGY_empty_VYEY_xy(v0, v1, v2, 0); VMULWEVxy(D_W, v0, v1, v2); break; + case 0x2B: + INST_NAME("VPACKUSDW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VLDIxy(q0, 0b0010011111111); // broadcast 0xff as 16-bit elements to all lanes + if (v1 == v2) { + VMAXIxy(W, v0, v1, 0); + VMINxy(W, v0, v1, q0); + VPICKEVxy(H, v0, v0, v0); + } else { + VMAXIxy(W, q1, v2, 0); + VMAXIxy(W, v0, v1, 0); + VMINxy(W, q1, q1, q0); + VMINxy(W, v0, v0, q0); + VPICKEVxy(H, v0, q1, v0); + } + break; case 0x2C: INST_NAME("VMASKMOVPS Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 40aa62d0..f6a4d8ad 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -2763,7 +2763,7 @@ LSX instruction starts with V, LASX instruction starts with XV. } \ } while (0) -#define VREPLVE0xy(width, vd, vj) \ +#define VREPLVE0xy(width, vd, vj) \ do { \ if (vex.l) { \ XVREPLVE0_##width(vd, vj); \ @@ -2772,6 +2772,15 @@ LSX instruction starts with V, LASX instruction starts with XV. } \ } while (0) +#define VMAXIxy(width, vd, vj, imm) \ + do { \ + if (vex.l) { \ + XVMAXI_##width(vd, vj, imm); \ + } else { \ + VMAXI_##width(vd, vj, imm); \ + } \ + } while (0) + #define VMINIxy(width, vd, vj, imm) \ do { \ if (vex.l) { \ @@ -2979,4 +2988,13 @@ LSX instruction starts with V, LASX instruction starts with XV. } \ } while (0) +#define VSATxy(width, vd, vj, imm) \ + do { \ + if (vex.l) { \ + XVSAT_##width(vd, vj, imm); \ + } else { \ + VSAT_##width(vd, vj, imm); \ + } \ + } while (0) + #endif //__ARM64_EMITTER_H__ |