diff options
| author | phorcys <phorcys@126.com> | 2025-07-14 17:33:26 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-14 11:33:26 +0200 |
| commit | 7a4583699b5e54f5e4efe23c7920cf2993e3e1f9 (patch) | |
| tree | c9cdeeecda3c4f7f9944c7ec03bcaeae2d475a18 /src | |
| parent | 26851574ad46438c8caca62717afab764395a465 (diff) | |
| download | box64-7a4583699b5e54f5e4efe23c7920cf2993e3e1f9.tar.gz box64-7a4583699b5e54f5e4efe23c7920cf2993e3e1f9.zip | |
[LA64_DYNAREC] Add la64 avx arith ops , part 1. (#2814)
Add 1:1 avx arith ops.
* VP{ADD,SUB}{B,W,D,Q,SB,SW,USB,USW}
* VPMUL{DQ,,HW,HUW,Lw,LD,LUDQ}
* V{MAX,MIN}{UB,UW,UD,SB,SW,SD}
* VAVG{B,W}
* VSIGN{B,W,D}Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 162 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 84 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 117 |
3 files changed, 357 insertions, 6 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 68d69241..acc0ca9c 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -487,12 +487,24 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, GETGY_empty_VYEY_xy(v0, v1, v2, 0); q0 = fpu_get_scratch(dyn); d0 = fpu_get_scratch(dyn); - VREPLVE0xy(D, q0 ,v2); + VREPLVE0xy(D, q0, v2); VLDIxy(d0, (0b011 << 10) | 0x3f); VSLExy(DU, d0, q0, d0); VSRLxy(D, v0, v1, q0); VAND_Vxy(v0, v0, d0); break; + case 0xD4: + INST_NAME("VPADDQ Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VADDxy(D, v0, v1, v2); + break; + case 0xD5: + INST_NAME("VPMULLW Gx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMULxy(H, v0, v1, v2); + break; case 0xD6: INST_NAME("VMOVD Ex, Gx"); nextop = F8; @@ -523,18 +535,60 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VPICKVE2GR_DU(gd, d1, 0); } break; + case 0xD8: + INST_NAME("VPSUBUSB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSSUBxy(BU, v0, v1, v2); + break; + case 0xD9: + INST_NAME("VPSUBUSW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSSUBxy(HU, v0, v1, v2); + break; + case 0xDA: + INST_NAME("VPMINUB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMINxy(BU, v0, v1, v2); + break; case 0xDB: INST_NAME("VPAND Gx, Vx, Ex"); nextop = F8; GETGY_empty_VYEY_xy(v0, v1, v2, 0); VAND_Vxy(v0, v1, v2); break; + case 0xDC: + INST_NAME("VPADDUSB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSADDxy(BU, v0, v1, v2); + break; + case 0xDD: + INST_NAME("VPADDUSW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSADDxy(HU, v0, v1, v2); + break; + case 0xDE: + INST_NAME("VPMAXUB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMAXxy(BU, v0, v1, v2); + break; case 0xDF: INST_NAME("VPANDN Gx, Vx, Ex"); nextop = F8; GETGY_empty_VYEY_xy(v0, v1, v2, 0); VANDN_Vxy(v0, v1, v2); break; + case 0xE0: + INST_NAME("VPAVGB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VAVGRxy(BU, v0, v1, v2); + break; case 0xE1: INST_NAME("VPSRAW Gx, Vx, Ex"); nextop = F8; @@ -542,7 +596,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, d0 = fpu_get_scratch(dyn); VMINIxy(DU, d0, v2, 15); VREPLVE0xy(H, d0, d0); - VSRAxy(H, v0, v1, d0); + VSRAxy(H, v0, v1, d0); break; case 0xE2: INST_NAME("VPSRAD Gx, Vx, Ex"); @@ -551,7 +605,25 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, d0 = fpu_get_scratch(dyn); VMINIxy(DU, d0, v2, 31); VREPLVE0xy(W, d0, d0); - VSRAxy(W, v0, v1, d0); + VSRAxy(W, v0, v1, d0); + break; + case 0xE3: + INST_NAME("VPAVGW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VAVGRxy(HU, v0, v1, v2); + break; + case 0xE4: + INST_NAME("VPMULHUW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMUHxy(HU, v0, v1, v2); + break; + case 0xE5: + INST_NAME("VPMULHW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMUHxy(H, v0, v1, v2); break; case 0xE7: INST_NAME("VMOVNTDQ Ex, Gx"); @@ -569,12 +641,48 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, SMWRITE2(); } break; + case 0xE8: + INST_NAME("VPSUBSB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSSUBxy(B, v0, v1, v2); + break; + case 0xE9: + INST_NAME("VPSUBSW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSSUBxy(H, v0, v1, v2); + break; + case 0xEA: + INST_NAME("VPMINSW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMINxy(H, v0, v1, v2); + break; case 0xEB: INST_NAME("VPOR Gx, Vx, Ex"); nextop = F8; GETGY_empty_VYEY_xy(v0, v1, v2, 0); VOR_Vxy(v0, v1, v2); break; + case 0xEC: + INST_NAME("VPADDSB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSADDxy(B, v0, v1, v2); + break; + case 0xED: + INST_NAME("VPADDSW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSADDxy(H, v0, v1, v2); + break; + case 0xEE: + INST_NAME("VPMAXSW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMAXxy(H, v0, v1, v2); + break; case 0xEF: INST_NAME("VPXOR Gx, Vx, Ex"); nextop = F8; @@ -617,6 +725,12 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VSLLxy(D, v0, v1, q0); VAND_Vxy(v0, v0, d0); break; + case 0xF4: + INST_NAME("VPMULLUDQ Gx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMULWEVxy(D_WU, v0, v1, v2); + break; case 0xF7: INST_NAME("VMASKMOVDQU Gx, Ex"); nextop = F8; @@ -629,6 +743,48 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VBITSEL_V(q0, q0, v0, q1); // sel v0 if mask is 1 VST(q0, xRDI, 0); break; + case 0xF8: + INST_NAME("VPSUBB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSUBxy(B, v0, v1, v2); + break; + case 0xF9: + INST_NAME("VPSUBW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSUBxy(H, v0, v1, v2); + break; + case 0xFA: + INST_NAME("VPSUBD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSUBxy(W, v0, v1, v2); + break; + case 0xFB: + INST_NAME("VPSUBQ Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSUBxy(D, v0, v1, v2); + break; + case 0xFC: + INST_NAME("VPADDB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VADDxy(B, v0, v1, v2); + break; + case 0xFD: + INST_NAME("VPADDW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VADDxy(H, v0, v1, v2); + break; + case 0xFE: + INST_NAME("VPADDD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VADDxy(W, v0, v1, v2); + break; default: DEFAULT; } diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index 51696bc7..6e794734 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -57,6 +57,24 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i rex_t rex = vex.rex; switch (opcode) { + case 0x08: + INST_NAME("VPSIGNB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSIGNCOVxy(B, v0, v2, v1); + break; + case 0x09: + INST_NAME("VPSIGNW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSIGNCOVxy(H, v0, v2, v1); + break; + case 0x0A: + INST_NAME("VPSIGND Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VSIGNCOVxy(W, v0, v2, v1); + break; case 0x18: INST_NAME("VBROADCASTSS Gx, Ex"); nextop = F8; @@ -91,7 +109,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i if (vex.l) { GETEYx(q1, 0, 0); GETGYy_empty(q0); - VEXT2XV_H_B(q0, q1); + VEXT2XV_H_B(q0, q1); } else { GETEYSD(q1, 0, 0); GETGYx_empty(q0); @@ -134,7 +152,7 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i if (vex.l) { GETEYx(q1, 0, 0); GETGYy_empty(q0); - VEXT2XV_W_H(q0, q1); + VEXT2XV_W_H(q0, q1); } else { GETEYSD(q1, 0, 0); GETGYx_empty(q0); @@ -158,13 +176,19 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i if (vex.l) { GETEYx(q1, 0, 0); GETGYy_empty(q0); - VEXT2XV_D_W(q0, q1); + VEXT2XV_D_W(q0, q1); } else { GETEYSD(q1, 0, 0); GETGYx_empty(q0); VSLLWIL_D_W(q0, q1, 0); } break; + case 0x28: + INST_NAME("VPMULDQ Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMULWEVxy(D_W, v0, v1, v2); + break; case 0x2C: INST_NAME("VMASKMOVPS Gx, Vx, Ex"); nextop = F8; @@ -307,6 +331,60 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i VSLLWIL_DU_WU(q0, q1, 0); } break; + case 0x38: + INST_NAME("VPMINSB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMINxy(B, v0, v1, v2); + break; + case 0x39: + INST_NAME("VPMINSD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMINxy(W, v0, v1, v2); + break; + case 0x3A: + INST_NAME("VPMINUW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMINxy(HU, v0, v1, v2); + break; + case 0x3B: + INST_NAME("VPMINUD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMINxy(WU, v0, v1, v2); + break; + case 0x3C: + INST_NAME("VPMAXSB Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMAXxy(B, v0, v1, v2); + break; + case 0x3D: + INST_NAME("VPMAXSD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMAXxy(W, v0, v1, v2); + break; + case 0x3E: + INST_NAME("VPMAXUW Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMAXxy(HU, v0, v1, v2); + break; + case 0x3F: + INST_NAME("VPMAXUD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMAXxy(WU, v0, v1, v2); + break; + case 0x40: + INST_NAME("VPMULLD Gx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + VMULxy(W, v0, v1, v2); + break; case 0x45: INST_NAME("VPSRLVD/Q Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 692b8fc8..ea78e328 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -2774,4 +2774,121 @@ LSX instruction starts with V, LASX instruction starts with XV. VMINI_##width(vd, vj, imm); \ } \ } while (0) + +#define VADDxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVADD_##width(vd, vj, vk); \ + } else { \ + VADD_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VSUBxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSUB_##width(vd, vj, vk); \ + } else { \ + VSUB_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VSADDxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSADD_##width(vd, vj, vk); \ + } else { \ + VSADD_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VSSUBxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSSUB_##width(vd, vj, vk); \ + } else { \ + VSSUB_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VMULxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVMUL_##width(vd, vj, vk); \ + } else { \ + VMUL_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VMUHxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVMUH_##width(vd, vj, vk); \ + } else { \ + VMUH_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VMULWEVxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVMULWEV_##width(vd, vj, vk); \ + } else { \ + VMULWEV_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VMULWODxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVMULWOD_##width(vd, vj, vk); \ + } else { \ + VMULWOD_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VMAXxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVMAX_##width(vd, vj, vk); \ + } else { \ + VMAX_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VMINxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVMIN_##width(vd, vj, vk); \ + } else { \ + VMIN_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VSIGNCOVxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVSIGNCOV_##width(vd, vj, vk); \ + } else { \ + VSIGNCOV_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VAVGxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVAVG_##width(vd, vj, vk); \ + } else { \ + VAVG_##width(vd, vj, vk); \ + } \ + } while (0) + +#define VAVGRxy(width, vd, vj, vk) \ + do { \ + if (vex.l) { \ + XVAVGR_##width(vd, vj, vk); \ + } else { \ + VAVGR_##width(vd, vj, vk); \ + } \ + } while (0) #endif //__ARM64_EMITTER_H__ |