diff options
| author | phorcys <phorcys@126.com> | 2025-07-14 14:54:16 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-14 08:54:16 +0200 |
| commit | ce08e8e27f7fda2ff2c02af215f3b8e16d3f0576 (patch) | |
| tree | a82ec9f1537c149ba6db3bf5a07d40d71dfc6936 | |
| parent | 6960cfa56fd786bc02c509cf62bcbc815fc672b1 (diff) | |
| download | box64-ce08e8e27f7fda2ff2c02af215f3b8e16d3f0576.tar.gz box64-ce08e8e27f7fda2ff2c02af215f3b8e16d3f0576.zip | |
[LA64_DYNAREC] Add la64 avx shift ops. (#2806)
* VEX.66.0f VPSRLW/VPSRLDVPSRLQ/VPSRAW/VPSRAD/VPSLLW/VPSLLD/VPSLLQ * VEX.66.0f.3a VPSRLVD/VPSRLVQ/VPSRAVDVPSLLVD/VPSLLVQ
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 90 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 42 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 74 |
3 files changed, 205 insertions, 1 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 0c707a28..26b6f684 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -457,6 +457,42 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0xD1: + INST_NAME("VPSRLW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VREPLVE0xy(D, q0, v2); + VREPLVE0xy(H, d0, v2); + VSLEIxy(DU, q0, q0, 15); + VSRLxy(H, v0, v1, d0); + VAND_Vxy(v0, v0, q0); + break; + case 0xD2: + INST_NAME("VPSRLD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VREPLVE0xy(D, q0, v2); + VREPLVE0xy(W, d0, v2); + VSLEIxy(DU, q0, q0, 31); + VSRLxy(W, v0, v1, d0); + VAND_Vxy(v0, v0, q0); + break; + case 0xD3: + INST_NAME("VPSRLQ Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VREPLVE0xy(D, q0 ,v2); + VLDIxy(d0, (0b011 << 10) | 0x3f); + VSLExy(DU, d0, q0, d0); + VSRLxy(D, v0, v1, q0); + VAND_Vxy(v0, v0, d0); + break; case 0xD6: INST_NAME("VMOVD Ex, Gx"); nextop = F8; @@ -499,6 +535,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, GETGY_empty_VYEY_xy(v0, v1, v2, 0); VANDN_Vxy(v0, v1, v2); break; + case 0xE1: + INST_NAME("VPSRAW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + d0 = fpu_get_scratch(dyn); + VMINIxy(DU, d0, v2, 15); + VREPLVE0xy(H, d0, d0); + VSRAxy(H, v0, v1, d0); + break; + case 0xE2: + INST_NAME("VPSRAD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + d0 = fpu_get_scratch(dyn); + VMINIxy(DU, d0, v2, 31); + VREPLVE0xy(W, d0, d0); + VSRAxy(W, v0, v1, d0); + break; case 0xE7: INST_NAME("VMOVNTDQ Ex, Gx"); nextop = F8; @@ -527,6 +581,42 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, GETGY_empty_VYEY_xy(v0, v1, v2, 0); VXOR_Vxy(v0, v1, v2); break; + case 0xF1: + INST_NAME("VPSLLW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VREPLVE0xy(D, q0, v2); + VSLEIxy(DU, q0, q0, 15); + VREPLVE0xy(H, d0, v2); + VSLLxy(H, v0, v1, d0); + VAND_Vxy(v0, v0, q0); + break; + case 0xF2: + INST_NAME("VPSLLD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VREPLVE0xy(D, q0, v2); + VSLEIxy(DU, q0, q0, 31); + VREPLVE0xy(W, d0, v2); + VSLLxy(W, v0, v1, d0); + VAND_Vxy(v0, v0, q0); + break; + case 0xF3: + INST_NAME("VPSLLQ Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + d0 = fpu_get_scratch(dyn); + VREPLVE0xy(D, q0, v2); + VLDIxy(d0, (0b011 << 10) | 0x3f); + VSLExy(DU, d0, q0, d0); + VSLLxy(D, v0, v1, q0); + VAND_Vxy(v0, v0, d0); + break; case 0xF7: INST_NAME("VMASKMOVDQU Gx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index e8f82715..1e561b67 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -307,6 +307,48 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i VSLLWIL_DU_WU(q0, q1, 0); } break; + case 0x45: + INST_NAME("VPSRLVD/Q Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + d1 = fpu_get_scratch(dyn); + if (rex.w) { + d0 = fpu_get_scratch(dyn); + VLDIxy(d0, (0b011 << 10) | 63); + VSLExy(DU, d1, v2, d0); + VSRLxy(D, v0, v1, v2); + VAND_Vxy(v0, v0, d1); + } else { + VSLEIxy(WU, d1, v2, 31); + VSRLxy(W, v0, v1, v2); + VAND_Vxy(v0, v0, d1); + } + break; + case 0x46: + INST_NAME("VPSRAVD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + d0 = fpu_get_scratch(dyn); + VMINIxy(WU, d0, v2, 31); + VSRAxy(W, v0, v1, d0); + break; + case 0x47: + INST_NAME("VPSLLVD/Q Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + d1 = fpu_get_scratch(dyn); + if (rex.w) { + d0 = fpu_get_scratch(dyn); + VLDIxy(d0, (0b011 << 10) | 63); + VSLExy(DU, d1, v2, d0); + VSLLxy(D, v0, v1, v2); + VAND_Vxy(v0, v0, d1); + } else { + VSLEIxy(WU, d1, v2, 31); + VSLLxy(W, v0, v1, v2); + VAND_Vxy(v0, v0, d1); + } + break; case 0x8C: INST_NAME("VPMASKMOVD/Q Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 0472481b..692b8fc8 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1530,7 +1530,6 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VFNMADD_D(vd, vj, vk, va) EMIT(type_4R(0b000010011010, va, vk, vj, vd)) #define VFNMSUB_D(vd, vj, vk, va) EMIT(type_4R(0b000010011110, va, vk, vj, vd)) - #define XVADD_B(vd, vj, vk) EMIT(type_3R(0b01110100000010100, vk, vj, vd)) #define XVADD_H(vd, vj, vk) EMIT(type_3R(0b01110100000010101, vk, vj, vd)) #define XVADD_W(vd, vj, vk) EMIT(type_3R(0b01110100000010110, vk, vj, vd)) @@ -2239,6 +2238,7 @@ LSX instruction starts with V, LASX instruction starts with XV. #define XVMINI_DU(xd, xj, imm5) EMIT(type_2RI5(0b01110110100101111, imm5, xj, xd)) #define XVFRSTPI_B(xd, xj, imm5) EMIT(type_2RI5(0b01110110100110100, imm5, xj, xd)) #define XVFRSTPI_H(xd, xj, imm5) EMIT(type_2RI5(0b01110110100110101, imm5, xj, xd)) +#define XVLDI(xd, imm13) EMIT(type_1RI13(0b01110111111000, imm13, xd)) #define XVFMADD_S(xd, xj, xk, xa) EMIT(type_4R(0b000010100001, xa, xk, xj, xd)) #define XVFMSUB_S(xd, xj, xk, xa) EMIT(type_4R(0b000010100101, xa, xk, xj, xd)) @@ -2702,4 +2702,76 @@ LSX instruction starts with V, LASX instruction starts with XV. VSRAI_##width(vd, vj, imm); \ } \ } while (0) + +#define VSLLxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSLL_##width(vd, vj, vk); \ + } else { \ + VSLL_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VSRLxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSRL_##width(vd, vj, vk); \ + } else { \ + VSRL_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VSRAxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSRA_##width(vd, vj, vk); \ + } else { \ + VSRA_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VSLEIxy(width, vd, vj, imm) \ + do { \ + if (vex.l) { \ + XVSLEI_##width(vd, vj, imm); \ + } else { \ + VSLEI_##width(vd, vj, imm); \ + } \ + } while (0) + +#define VSLExy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSLE_##width(vd, vj, vk); \ + } else { \ + VSLE_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VLDIxy(vd, imm) \ + do { \ + if (vex.l) { \ + XVLDI(vd, imm); \ + } else { \ + VLDI(vd, imm); \ + } \ + } while (0) + +#define VREPLVE0xy(width, vd, vj) \ + do { \ + if (vex.l) { \ + XVREPLVE0_##width(vd, vj); \ + } else { \ + VREPLVEI_##width(vd, vj, 0); \ + } \ + } while (0) + +#define VMINIxy(width, vd, vj, imm) \ + do { \ + if (vex.l) { \ + XVMINI_##width(vd, vj, imm); \ + } else { \ + VMINI_##width(vd, vj, imm); \ + } \ + } while (0) #endif //__ARM64_EMITTER_H__ |