diff options
| author | phorcys <phorcys@126.com> | 2025-08-02 20:42:05 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-08-02 14:42:05 +0200 |
| commit | cb0b274c2704e5af3c118d30992d63c6f5dff6e8 (patch) | |
| tree | 34f9be4ccb43e85a8f2ed29f8f3ae26f7a668dcb /src | |
| parent | ebac5834410781a5c263e50d987edf65bcbb9d8c (diff) | |
| download | box64-cb0b274c2704e5af3c118d30992d63c6f5dff6e8.tar.gz box64-cb0b274c2704e5af3c118d30992d63c6f5dff6e8.zip | |
[LA64_DYNAREC] Fix some la64 avx/sse ops. (#2882)
Fix 66.0F.F3 PSLLQ Fix VEX.66.0F.7E VMOVD not zero-extend Fix Vex.66.0F.3A.06 VPERM2F128/VPERM2I128 Fix Vex.66.0F.3A.0D VBLENDPD Fix VEX.66.0F.3A.18/38 VINSERTF128/VINSERTI128 when q0 == q1 or q0 == q2 Fix VEX.66.0F.3A.21 VINSERTPS fix u8 get pos Fix VEX.66.0F.3A.40 VDPPS Fix VREPLVEIxy emit when vex.l Fix VEX.66.0F.38.0C VPERMILPS Fix VEX.66.0F.38.2B VPACKUSDW Fix VEX.66.0F.38.93 VGATHERQPD
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 18 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f3a.c | 27 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 24 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 44 |
6 files changed, 80 insertions, 37 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index afc1f848..3e0080a3 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -2519,7 +2519,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int v1 = fpu_get_scratch(dyn); VREPLVEI_D(v0, q1, 0); VLDI(v1, (0b011 << 10) | 0x3f); - VSLEI_DU(v1, v0, v1); + VSLE_DU(v1, v0, v1); VSLL_D(q0, q0, v0); VAND_V(q0, q0, v1); break; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index eac332ff..18379800 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -827,7 +827,7 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, if (rex.w) { VPICKVE2GR_D(ed, v0, 0); } else { - VPICKVE2GR_W(ed, v0, 0); + VPICKVE2GR_WU(ed, v0, 0); } } else { addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0); diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index 037d49a6..3017b7ae 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -187,7 +187,6 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i INST_NAME("VPERMILPS Gx, Vx, Ex"); nextop = F8; GETGY_empty_VYEY_xy(v0, v1, v2, 0); - u8 = F8; d0 = fpu_get_scratch(dyn); VANDIxy(d0, v2, 0b11); VSHUFxy(W, d0, v1, v1); @@ -457,17 +456,17 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i q0 = fpu_get_scratch(dyn); q1 = fpu_get_scratch(dyn); d0 = fpu_get_scratch(dyn); - VLDIxy(q0, 0b0010011111111); // broadcast 0xff as 16-bit elements to all lanes + VLDIxy(d0, (0b10111 <<8) | 0x00); // Broadcast 0x0000FFFF as 32bits to all lane if (v1 == v2) { - VMAXIxy(W, v0, v1, 0); - VMINxy(W, v0, v1, q0); - VPICKEVxy(H, v0, v0, v0); + VMAXIxy(W, q0, v1, 0); + VMINxy(W, q0, q0, d0); + VPICKEVxy(H, v0, q0, q0); } else { VMAXIxy(W, q1, v2, 0); - VMAXIxy(W, v0, v1, 0); - VMINxy(W, q1, q1, q0); - VMINxy(W, v0, v0, q0); - VPICKEVxy(H, v0, q1, v0); + VMAXIxy(W, q0, v1, 0); + VMINxy(W, q1, q1, d0); + VMINxy(W, q0, q0, d0); + VPICKEVxy(H, v0, q1, q0); } break; case 0x2C: @@ -980,7 +979,6 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } VXOR_V(v2, v2, v2); } - XVPERMI_Q(v0, v2, XVPERMI_IMM_4_0(1, 2)); break; case 0x96: INST_NAME("VFMADDSUB132PS/D Gx, Vx, Ex"); diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c index ccfe759c..dbd1bca4 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c @@ -140,8 +140,8 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } nextop = F8; if (!vex.l) EMIT(0); - u8 = F8; GETGY_empty_VYEY_xy(v0, v1, v2, 1); + u8 = F8; if (u8 == 0x88) { XVXOR_V(v0, v0, v0); break; @@ -296,7 +296,7 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i XVPERMI_Q(d0, d1, XVPERMI_IMM_4_0(1, 2)); XVOR_V(v0, d0, d0); } else { - u8 = F8 & 0b11; + u8 = u8 & 0b11; switch (u8) { case 0b00: VOR_V(v0, v1, v1); @@ -411,8 +411,12 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i nextop = F8; GETGY_empty_VYEY_xy(q0, q1, q2, 1); u8 = F8; - XVOR_V(q0, q1, q1); - XVPERMI_Q(q0, q2, (u8 & 1) == 0 ? 0b00110000 : 0b00000010); + if(q0 != q2){ + if(q0 != q1) XVOR_V(q0, q1, q1); + XVPERMI_Q(q0, q2, ((u8 & 1) == 0) ? 0x30: 0x02); + } else{ + XVPERMI_Q(q0, q1, ((u8 & 1) == 0) ? 0x12 : 0x20); + } break; case 0x19: case 0x39: @@ -470,9 +474,6 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i case 0x21: INST_NAME("VINSERTPS Gx, Vx, Ex, Ib"); nextop = F8; - uint8_t src_index = (u8 >> 6) & 3; - uint8_t dst_index = (u8 >> 4) & 3; - uint8_t zmask = u8 & 0xf; q1 = fpu_get_scratch(dyn); if (MODREG) { GETGY_empty_VYEY_xy(v0, v1, v2, 1); @@ -480,24 +481,24 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i if (v0 == v2) { VOR_V(q1, v2, v2); if (v0 != v1) VOR_V(v0, v1, v1); - VEXTRINS_W(v0, q1, VEXTRINS_IMM_4_0(dst_index, src_index)); + VEXTRINS_W(v0, q1, VEXTRINS_IMM_4_0((u8 >> 4) & 3, (u8 >> 6) & 3)); } else { if (v0 != v1) VOR_V(v0, v1, v1); - VEXTRINS_W(v0, v2, VEXTRINS_IMM_4_0(dst_index, src_index)); + VEXTRINS_W(v0, v2, VEXTRINS_IMM_4_0((u8 >> 4) & 3, (u8 >> 6) & 3)); } } else { GETVYx(v1, 0); GETGYx_empty(v0); - u8 = F8; if (v0 != v1) VOR_V(v0, v1, v1); SMREAD(); - addr = geted(dyn, addr, ninst, nextop, &wback, x3, x5, &fixedaddress, rex, NULL, 0, 1); + addr = geted(dyn, addr, ninst, nextop, &wback, x3, x5, &fixedaddress, rex, NULL, 1, 1); u8 = F8; FLD_S(q1, wback, fixedaddress); - VEXTRINS_W(v0, q1, VEXTRINS_IMM_4_0(dst_index, 0)); // src index is zero when Ex is mem operand + VEXTRINS_W(v0, q1, VEXTRINS_IMM_4_0((u8 >> 4) & 3, 0)); // src index is zero when Ex is mem operand } - VXOR_V(q1, q1, q1); + uint8_t zmask = u8 & 0xf; if (zmask) { + VXOR_V(q1, q1, q1); for (uint8_t i = 0; i < 4; i++) { if (zmask & (1 << i)) { VEXTRINS_W(v0, q1, VEXTRINS_IMM_4_0(i, 0)); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 7552a1f9..196126a7 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -3288,18 +3288,18 @@ LSX instruction starts with V, LASX instruction starts with XV. } \ } while (0) -#define VREPLVEIxy(width, vd, vj, imm) \ - do { \ - if (vex.l) { \ - if (imm > 0) { \ - ADDI_D(x5, xZR, imm); \ - XVREPLVE_##width(vd, vj, x5); \ - } else { \ - XVREPLVE0_##width(vd, vj); \ - } \ - } else { \ - VREPLVEI_##width(vd, vj, imm); \ - } \ +#define VREPLVEIxy(width, vd, vj, imm) \ + do { \ + if (vex.l) { \ + if (imm > 0) { \ + ADDI_D(x5, xZR, imm); \ + XVREPLVE_##width(vd, vj, x5); \ + } else { \ + XVREPLVE_##width(vd, vj, xZR); \ + } \ + } else { \ + VREPLVEI_##width(vd, vj, imm); \ + } \ } while (0) #define VSEQxy(width, vd, vj, vk) \ diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 2fe47957..3b984d32 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -7672,6 +7672,50 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VBITCLRI.D", Vt[Rd], Vt[Rj], imm); return buff; } + if (isMask(opcode, "01110010100110100iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VFRSTPI.B", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100110101iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VFRSTPI.H", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100100000iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VMAXI.B", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100100001iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VMAXI.H", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100100010iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VMAXI.W", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100100011iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VMAXI.D", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100101000iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VMAXI.BU", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100101001iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VMAXI.HU", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100101010iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VMAXI.WU", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110010100101011iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VMAXI.DU", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110011111000iiiiiiiiiiiiiddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, 0x%lx", "VLDI", Vt[Rd], imm); + return buff; + } snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; } |