diff options
| author | phorcys <phorcys@126.com> | 2025-07-23 17:01:12 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-23 11:01:12 +0200 |
| commit | e1c1303d285287a9d27af0ea9a82c0673a8e744b (patch) | |
| tree | fe1bcf39dd29579fe60cae3453489ab09c498ccc /src | |
| parent | 4a8a3736622a559dee709fe4769cc64704f5b69e (diff) | |
| download | box64-e1c1303d285287a9d27af0ea9a82c0673a8e744b.tar.gz box64-e1c1303d285287a9d27af0ea9a82c0673a8e744b.zip | |
[LA64_DYNAREC] Add la64 avx float ops VDPP{S,D}, VH{ADD,SUB}{PS,PD} (#2842)
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f.c | 42 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f3a.c | 50 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f2_0f.c | 42 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_f3_0f.c | 8 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 21 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 32 |
6 files changed, 191 insertions, 4 deletions
diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f.c b/src/dynarec/la64/dynarec_la64_avx_66_0f.c index d7e2ecf5..4217a713 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f.c @@ -603,6 +603,48 @@ uintptr_t dynarec64_AVX_66_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, DEFAULT; } break; + case 0x7C: + INST_NAME("VHADDPD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + VPICKEVxy(D, q0, v2, v1); + VPICKODxy(D, v0, v2, v1); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFCMPxy(D, d0, q0, v0, cUN); + } + VFADDxy(D, v0, q0, v0); + if (!BOX64ENV(dynarec_fastnan)) { + VFCMPxy(D, d1, v0, v0, cUN); + VANDN_Vxy(d0, d0, d1); + VLDIxy(d1, (0b011 << 9) | 0b111111000); + VSLLIxy(D, d1, d1, 48); // broadcast 0xfff8000000000000 + VBITSEL_Vxy(v0, v0, d1, d0); + } + break; + case 0x7D: + INST_NAME("VHSUBPD Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + VPICKEVxy(D, q0, v2, v1); + VPICKODxy(D, v0, v2, v1); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFCMPxy(D, d0, q0, v0, cUN); + } + VFSUBxy(D, v0, q0, v0); + if (!BOX64ENV(dynarec_fastnan)) { + VFCMPxy(D, d1, v0, v0, cUN); + VANDN_Vxy(d0, d0, d1); + VLDIxy(d1, (0b011 << 9) | 0b111111000); + VSLLIxy(D, d1, d1, 48); // broadcast 0xfff8000000000000 + VBITSEL_Vxy(v0, v0, d1, d0); + } + break; case 0x7E: INST_NAME("VMOVD Ed, Gx"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c index fedc6ec8..beba561f 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f3a.c @@ -404,6 +404,56 @@ uintptr_t dynarec64_AVX_66_0F3A(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i } } break; + case 0x40: + INST_NAME("VDPPS Gx, Vx, Ex, Ib"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 1); + u8 = F8; + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + d2 = fpu_get_scratch(dyn); + VFMULxy(S, d0, v1, v2); + VXOR_Vxy(d2, d2, d2); + for (int i = 0; i < 4; ++i) { + if (!(u8 & (1 << (4 + i)))) { + VEXTRINSxy(W, d0, d2, (i << 4)); + } + } + VSHUF4Ixy(W, d1, d0, 0b10110001); // v0[a,b,c,d] v1[b,a,d,c] + VFADDxy(S, d0, d0, d1); // v0[ab,ba,cd,dc] + VSHUF4Ixy(W, d1, d0, 0b01001110); // v1[cd,dc,ab,ba] + VFADDxy(S, d0, d0, d1); // v0[abcd,badc,cdab,dcba] + VREPLVEIxy(W, v0, d0, 0); + for (int i = 0; i < 4; ++i) { + if (!(u8 & (1 << i))) { + VEXTRINSxy(W, v0, d2, (i << 4)); + } + } + break; + case 0x41: + INST_NAME("VDPPD Gx, Vx, Ex, Ib"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 1); + u8 = F8; + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + d2 = fpu_get_scratch(dyn); + VFMULxy(D, d0, v1, v2); + VXOR_Vxy(d2, d2, d2); + for (int i = 0; i < 2; ++i) { + if (!(u8 & (1 << (4 + i)))) { + VEXTRINSxy(D, d0, d2, (i << 4)); + } + } + VSHUF4Ixy(W, d1, d0, 0b01001110); // v0[a,b] v1[b,a] + VFADDxy(D, d0, d0, d1); // v0[ab,ba] + VREPLVEIxy(D, v0, d0, 0); + for (int i = 0; i < 2; ++i) { + if (!(u8 & (1 << i))) { + VEXTRINSxy(D, v0, d2, (i << 4)); + } + } + break; case 0x42: INST_NAME("VMPSADBW Gx, Vx, Ex, Ib"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c index 446ed244..2f492c85 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f2_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f2_0f.c @@ -206,6 +206,48 @@ uintptr_t dynarec64_AVX_F2_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, VEXTRINSxy(D, v0, d0, VEXTRINS_IMM_4_0(0, 0)); } break; + case 0x7C: + INST_NAME("VHADDPS Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + VPICKEVxy(W, q0, v2, v1); + VPICKODxy(W, v0, v2, v1); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFCMPxy(S, d0, q0, v0, cUN); + } + VFADDxy(S, v0, q0, v0); + if (!BOX64ENV(dynarec_fastnan)) { + VFCMPxy(S, d1, v0, v0, cUN); + VANDN_Vxy(d0, d0, d1); + VLDIxy(d1, (0b010 << 9) | 0b1111111100); + VSLLIxy(W, d1, d1, 20); // broadcast 0xFFC00000 + VBITSEL_Vxy(v0, v0, d1, d0); + } + break; + case 0x7D: + INST_NAME("VHSUBPS Gx, Vx, Ex"); + nextop = F8; + GETGY_empty_VYEY_xy(v0, v1, v2, 0); + q0 = fpu_get_scratch(dyn); + VPICKEVxy(W, q0, v2, v1); + VPICKODxy(W, v0, v2, v1); + if (!BOX64ENV(dynarec_fastnan)) { + d0 = fpu_get_scratch(dyn); + d1 = fpu_get_scratch(dyn); + VFCMPxy(S, d0, q0, v0, cUN); + } + VFSUBxy(S, v0, q0, v0); + if (!BOX64ENV(dynarec_fastnan)) { + VFCMPxy(S, d1, v0, v0, cUN); + VANDN_Vxy(d0, d0, d1); + VLDIxy(d1, (0b010 << 9) | 0b1111111100); + VSLLIxy(W, d1, d1, 20); // broadcast 0xFFC00000 + VBITSEL_Vxy(v0, v0, d1, d0); + } + break; case 0xD0: INST_NAME("VADDSUBPS Gx, Vx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c index 2e51ce90..f2ea3acd 100644 --- a/src/dynarec/la64/dynarec_la64_avx_f3_0f.c +++ b/src/dynarec/la64/dynarec_la64_avx_f3_0f.c @@ -104,7 +104,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, INST_NAME("VADDSS Gx, Vx, Ex"); nextop = F8; GETVYx(v1, 0); - GETEYSD(v2, 0, 0); + GETEYSS(v2, 0, 0); GETGYx_empty(v0); d0 = fpu_get_scratch(dyn); FADD_S(d0, v1, v2); @@ -123,7 +123,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, INST_NAME("VMULSS Gx, Vx, Ex"); nextop = F8; GETVYx(v1, 0); - GETEYSD(v2, 0, 0); + GETEYSS(v2, 0, 0); GETGYx_empty(v0); d0 = fpu_get_scratch(dyn); FMUL_S(d0, v1, v2); @@ -142,7 +142,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, INST_NAME("VSUBSS Gx, Vx, Ex"); nextop = F8; GETVYx(v1, 0); - GETEYSD(v2, 0, 0); + GETEYSS(v2, 0, 0); GETGYx_empty(v0); d0 = fpu_get_scratch(dyn); FSUB_S(d0, v1, v2); @@ -161,7 +161,7 @@ uintptr_t dynarec64_AVX_F3_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, INST_NAME("VDIVSS Gx, Vx, Ex"); nextop = F8; GETVYx(v1, 0); - GETEYSD(v2, 0, 0); + GETEYSS(v2, 0, 0); GETGYx_empty(v0); d0 = fpu_get_scratch(dyn); FDIV_S(d0, v1, v2); diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 42899386..57feffcf 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -2132,6 +2132,14 @@ LSX instruction starts with V, LASX instruction starts with XV. #define XVFRSTPI_H(xd, xj, imm5) EMIT(type_2RI5(0b01110110100110101, imm5, xj, xd)) #define XVLDI(xd, imm13) EMIT(type_1RI13(0b01110111111000, imm13, xd)) #define XVSHUF_B(xd, xj, xk, xa) EMIT(type_4R(0b000011010110, xa, xk, xj, xd)) +#define XVREPLVE_B(xd, xj, rk) EMIT(type_3R(0b01110101001000100, rk, xj, xd)) +#define XVREPLVE_H(xd, xj, rk) EMIT(type_3R(0b01110101001000101, rk, xj, xd)) +#define XVREPLVE_W(xd, xj, rk) EMIT(type_3R(0b01110101001000110, rk, xj, xd)) +#define XVREPLVE_D(xd, xj, rk) EMIT(type_3R(0b01110101001000111, rk, xj, xd)) +#define XVREPLGR2VR_B(xd, rj) EMIT(type_2R(0b0111011010011111000000, rj, xd)) +#define XVREPLGR2VR_H(xd, rj) EMIT(type_2R(0b0111011010011111000001, rj, xd)) +#define XVREPLGR2VR_W(xd, rj) EMIT(type_2R(0b0111011010011111000010, rj, xd)) +#define XVREPLGR2VR_D(xd, rj) EMIT(type_2R(0b0111011010011111000011, rj, xd)) #define XVFMADD_S(xd, xj, xk, xa) EMIT(type_4R(0b000010100001, xa, xk, xj, xd)) #define XVFMSUB_S(xd, xj, xk, xa) EMIT(type_4R(0b000010100101, xa, xk, xj, xd)) @@ -3198,4 +3206,17 @@ LSX instruction starts with V, LASX instruction starts with XV. } \ } while (0) +#define VREPLVEIxy(width, vd, vj, imm) \ + do { \ + if (vex.l) { \ + if (imm > 0) { \ + ADDI_D(x5, xZR, imm); \ + XVREPLVE_##width(vd, vj, x5); \ + } else { \ + XVREPLVE0_##width(vd, vj); \ + } \ + } else { \ + VREPLVEI_##width(vd, vj, imm); \ + } \ + } while (0) #endif //__ARM64_EMITTER_H__ diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 6e21b93e..39d4c612 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -7524,6 +7524,38 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %s, %s, %s", "XVFNMSUB.D", XVt[Rd], XVt[Rj], XVt[Rk], XVt[Ra]); return buff; } + if (isMask(opcode, "01110101001000100kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XVREPLVE.B", XVt[Rd], XVt[Rj], Xt[Rk]); + return buff; + } + if (isMask(opcode, "01110101001000101kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XVREPLVE.H", XVt[Rd], XVt[Rj], Xt[Rk]); + return buff; + } + if (isMask(opcode, "01110101001000110kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XVREPLVE.W", XVt[Rd], XVt[Rj], Xt[Rk]); + return buff; + } + if (isMask(opcode, "01110101001000111kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "XVREPLVE.D", XVt[Rd], XVt[Rj], Xt[Rk]); + return buff; + } + if (isMask(opcode, "0111011010011111000000jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.B", XVt[Rd], Xt[Rj]); + return buff; + } + if (isMask(opcode, "0111011010011111000001jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.H", XVt[Rd], Xt[Rj]); + return buff; + } + if (isMask(opcode, "0111011010011111000010jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.W", XVt[Rd], Xt[Rj]); + return buff; + } + if (isMask(opcode, "0111011010011111000011jjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "XVREPLGR2VR.D", XVt[Rd], Xt[Rj]); + return buff; + } snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; } |