diff options
| author | phorcys <phorcys@126.com> | 2025-08-04 14:57:32 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-04 08:57:32 +0200 |
| commit | 0dc9f8cb62de2877b6f5d480c480b203c5831fa8 (patch) | |
| tree | eef978773f7edb94a9b5d90d6b5342fcdc59f4a6 /src | |
| parent | cb0b274c2704e5af3c118d30992d63c6f5dff6e8 (diff) | |
| download | box64-0dc9f8cb62de2877b6f5d480c480b203c5831fa8.tar.gz box64-0dc9f8cb62de2877b6f5d480c480b203c5831fa8.zip | |
[LA64_DYNAREC] Add la64 avx bit ops. (#2873)
* VEX.66.0F.3A VPEXTRB/VPEXTRW/VPEXTRD/VPEXTRQ VPINSRB/VPINSRD/VPINSRQ * VEX.66.0F.C5 VPEXTRW * VEX.66.0f.C4 VPINSRW * VEX.66.0F.38.41 VPHMINPOSUW
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 25 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 21 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f3a.c | 85 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 2 |
4 files changed, 133 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index 18379800..30b035a0 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -884,6 +884,31 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, case 0x0f: VSEQxy(B, v0, v1, v1); break; // true } break; + case 0xC4: + INST_NAME("VPINSRW Gx, Vx, ED, Ib"); + nextop = F8; + GETEWW(0, x5, 1); + GETVYx(v1, 0); + GETGYx_empty(v0); + u8 = F8; + if(v0 != v1) VOR_V(v0, v1, v1); + VINSGR2VR_H(v0, ed, (u8 & 0x7)); + break; + case 0xC5: + INST_NAME("VPEXTRW Gd, Ex, Ib"); + nextop = F8; + GETGD; + if (MODREG) { + GETEYx(v0, 0, 1); + u8 = (F8) & 7; + VPICKVE2GR_HU(gd, v0, u8); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x4, &fixedaddress, rex, NULL, 0, 1); + u8 = (F8) & 7; + LD_HU(gd, wback, (u8 << 1)); + } + break; case 0xC6: INST_NAME("VSHUFPD Gx, Vx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index 3017b7ae..1f5ebbc7 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -677,6 +677,27 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i GETGY_empty_VYEY_xy(v0, v1, v2, 0); VMULxy(W, v0, v1, v2); break; + case 0x41: + INST_NAME("VPHMINPOSUW Gx, Ex"); + nextop = F8; + GETEYx(v1, 0, 0); + GETGYx_empty(v0); + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); + q2 = fpu_get_scratch(dyn); + // v1[a,b,c,d,e,f,g,h] + VSHUF4I_W(q0, v1, 0b01001110); // q0[e,f,g,h,a,b,c,d] + VMIN_HU(q1, v1, q0); // q1[ae,bf,cg,dh ...] + + VSHUF4I_H(q2, q1, 0b10110001); // q2[bf,ae,dh,cg ...] + VMIN_HU(q1, q1, q2); // q1[aebf,aebf,cgdh,cgdh ...] + VSHUF4I_H(q0, q1, 0b01001110); // q0[cgdh,cgdh,aebf,aebf] + VMIN_HU(q2, q0, q1); // all lane is min(abcdefgh) + VSEQ_H(q0, q2, v1); // get mask(0xffff) + VFRSTPI_H(q2, q0, 1); // find first neg(0xffff),insert index to q2 + XVPICKVE_W(v0, q2, 0); + YMM_UNMARK_UPPER_ZERO(v0); + break; case 0x45: INST_NAME("VPSRLVD/Q Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c index dbd1bca4..9a90ce0f 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c @@ -386,6 +386,63 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x14: + INST_NAME("VPEXTRB Ed, Gx, imm8"); + nextop = F8; + GETGYx(q0, 0); + if (MODREG) { + ed = TO_NAT((nextop & 7) + (rex.b << 3)); + u8 = (F8) & 15; + VPICKVE2GR_BU(ed, q0, u8); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x4, &fixedaddress, rex, NULL, 0, 1); + u8 = (F8) & 15; + VSTELM_B(q0, wback, 0, u8); + } + break; + case 0x15: + INST_NAME("VPEXTRW Ed, Gx, imm8"); + nextop = F8; + GETGYx(q0, 0); + if (MODREG) { + ed = TO_NAT((nextop & 7) + (rex.b << 3)); + u8 = (F8) & 7; + VPICKVE2GR_HU(ed, q0, u8); + } else { + SMREAD(); + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x4, &fixedaddress, rex, NULL, 0, 1); + u8 = (F8) & 7; + VSTELM_H(q0, wback, 0, u8); + } + break; + case 0x16: + if (rex.w) { + INST_NAME("VPEXTRQ Ed, Gx, Ib"); + } else { + INST_NAME("VPEXTRD Ed, Gx, Ib"); + } + nextop = F8; + GETGYx(q0, 0); + if (MODREG) { + ed = TO_NAT((nextop & 7) + (rex.b << 3)); + u8 = F8; + if (rex.w) { + VPICKVE2GR_D(ed, q0, (u8 & 1)); + } else { + VPICKVE2GR_WU(ed, q0, (u8 & 3)); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x5, &fixedaddress, rex, NULL, 0, 1); + u8 = F8; + if (rex.w) { + VSTELM_D(q0, ed, 0, (u8 & 1)); + } else { + VSTELM_W(q0, ed, 0, (u8 & 3)); + } + SMWRITE2(); + } + break; case 0x17: INST_NAME("VEXTRACTPS Ed, Gx, imm8"); nextop = F8; @@ -471,6 +528,16 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } x87_restoreround(dyn, ninst, u8); break; + case 0x20: + INST_NAME("VPINSRB Gx, Vx, ED, Ib"); + nextop = F8; + GETEB(x5, 1); + GETVYx(v1, 0); + GETGYx_empty(v0); + u8 = F8; + if(v0 != v1) VOR_V(v0, v1, v1); + VINSGR2VR_B(v0, ed, (u8 & 0xf)); + break; case 0x21: INST_NAME("VINSERTPS Gx, Vx, Ex, Ib"); nextop = F8; @@ -506,6 +573,24 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x22: + if (rex.w) { + INST_NAME("VPINSRQ Gx, Vx, ED, Ib"); + } else { + INST_NAME("VPINSRD Gx, Vx, ED, Ib"); + } + nextop = F8; + GETED(1); + GETVYx(v1, 0); + GETGYx_empty(v0); + u8 = F8; + if(v0 != v1) VOR_V(v0, v1, v1); + if(rex.w) { + VINSGR2VR_D(v0, ed, (u8 & 0x1)); + } else { + VINSGR2VR_W(v0, ed, (u8 & 0x3)); + } + break; case 0x2A: INST_NAME("VMOVNTDQA Gx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 196126a7..85ad1fd7 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1250,6 +1250,8 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VBITREV_D(vd, vj, vk) EMIT(type_3R(0b01110001000100011, vk, vj, vd)) #define VFRSTP_B(vd, vj, vk) EMIT(type_3R(0b01110001001010110, vk, vj, vd)) #define VFRSTP_H(vd, vj, vk) EMIT(type_3R(0b01110001001010111, vk, vj, vd)) +#define VFRSTPI_B(vd, vj, imm5) EMIT(type_2RI5(0b01110010100110100, imm5, vj, vd)) +#define VFRSTPI_H(vd, vj, imm5) EMIT(type_2RI5(0b01110010100110101, imm5, vj, vd)) #define VFADD_S(vd, vj, vk) EMIT(type_3R(0b01110001001100001, vk, vj, vd)) #define VFADD_D(vd, vj, vk) EMIT(type_3R(0b01110001001100010, vk, vj, vd)) #define VFSUB_S(vd, vj, vk) EMIT(type_3R(0b01110001001100101, vk, vj, vd)) |