diff options
| author | phorcys <phorcys@126.com> | 2025-07-30 14:34:35 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-07-30 08:34:35 +0200 |
| commit | 127d72da17a076beb564d0f21f14bbad5b27d755 (patch) | |
| tree | dba11f091edaaec46de0f0791dde374754b913c6 /src | |
| parent | f2baa6ed350362662482056312d20102b4c8c985 (diff) | |
| download | box64-127d72da17a076beb564d0f21f14bbad5b27d755.tar.gz box64-127d72da17a076beb564d0f21f14bbad5b27d755.zip | |
[LA64_DYNAREC] Add la64 avx cmp ops, part3. TEST ops. (#2857)
VPTEST VTESTPD, VTESTPS
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_avx_66_0f38.c | 91 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 69 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_printer.c | 52 |
4 files changed, 203 insertions, 11 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 741fde0d..afc1f848 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -546,7 +546,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int BCEQZ_MARK2(fcc0); if (cpuext.lbt) { ADDI_D(x3, xZR, 1 << F_CF); - X64_SET_EFLAGS(x3, X_ZF); + X64_SET_EFLAGS(x3, X_CF); } else { ORI(xFlags, xFlags, 1 << F_CF); } diff --git a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c index aa832385..5e7f064d 100644 --- a/src/dynarec/la64/dynarec_la64_avx_66_0f38.c +++ b/src/dynarec/la64/dynarec_la64_avx_66_0f38.c @@ -203,12 +203,103 @@ uintptr_t dynarec64_AVX_66_0F38(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t i VSHUFxy(D, d0, v2, v1); VOR_Vxy(v0, d0, d0); break; + case 0x0E: + case 0x0F: + if (opcode == 0x0E) { + INST_NAME("VTESTPS Gx, Ex"); + } else { + INST_NAME("VTESTPD Gx, Ex"); + } + nextop = F8; + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + GETGYxy(q0, 0); + GETEYxy(q1, 0, 0); + if (!cpuext.lbt) { + CLEAR_FLAGS(x3); + } else IFX (X_ALL) { + X64_SET_EFLAGS(xZR, X_ALL); + } + SET_DFNONE(); + v0 = fpu_get_scratch(dyn); + IFX (X_ZF) { + VAND_Vxy(v0, q1, q0); + if (opcode == 0x0E) { + VMSKLTZxy(W, v0, v0); + } else { + VMSKLTZxy(D, v0, v0); + } + VSETEQZ_Vxy(fcc0, v0); + BCEQZ_MARK(fcc0); + if (cpuext.lbt) { + ADDI_D(x3, xZR, 1 << F_ZF); + X64_SET_EFLAGS(x3, X_ZF); + } else { + ORI(xFlags, xFlags, 1 << F_ZF); + } + } + MARK; + IFX (X_CF) { + VANDN_Vxy(v0, q0, q1); + if (opcode == 0x0E) { + VMSKLTZxy(W, v0, v0); + } else { + VMSKLTZxy(D, v0, v0); + } + VSETEQZ_Vxy(fcc0, v0); + BCEQZ_MARK2(fcc0); + if (cpuext.lbt) { + ADDI_D(x3, xZR, 1 << F_CF); + X64_SET_EFLAGS(x3, X_CF); + } else { + ORI(xFlags, xFlags, 1 << F_CF); + } + } + MARK2; + break; case 0x16: INST_NAME("VPERMPS Gx, Vx, Ex"); nextop = F8; GETGY_empty_VYEY_xy(v0, v1, v2, 0); XVPERM_W(v0, v2, v1); break; + case 0x17: + INST_NAME("VPTEST Gx, Ex"); + nextop = F8; + SETFLAGS(X_ALL, SF_SET, NAT_FLAGS_NOFUSION); + GETGYxy(q0, 0); + GETEYxy(q1, 0, 0); + if (!cpuext.lbt) { + CLEAR_FLAGS(x3); + } else IFX (X_ALL) { + X64_SET_EFLAGS(xZR, X_ALL); + } + SET_DFNONE(); + v0 = fpu_get_scratch(dyn); + IFX (X_ZF) { + VAND_Vxy(v0, q1, q0); + VSETEQZ_Vxy(fcc0, v0); + BCEQZ_MARK(fcc0); + if (cpuext.lbt) { + ADDI_D(x3, xZR, 1 << F_ZF); + X64_SET_EFLAGS(x3, X_ZF); + } else { + ORI(xFlags, xFlags, 1 << F_ZF); + } + } + MARK; + IFX (X_CF) { + VANDN_Vxy(v0, q0, q1); + VSETEQZ_Vxy(fcc0, v0); + BCEQZ_MARK2(fcc0); + if (cpuext.lbt) { + ADDI_D(x3, xZR, 1 << F_CF); + X64_SET_EFLAGS(x3, X_CF); + } else { + ORI(xFlags, xFlags, 1 << F_CF); + } + } + MARK2; + break; case 0x18: INST_NAME("VBROADCASTSS Gx, Ex"); nextop = F8; diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index aa5399b9..804a28dd 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -1235,6 +1235,10 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VBITCLR_H(vd, vj, vk) EMIT(type_3R(0b01110001000011001, vk, vj, vd)) #define VBITCLR_W(vd, vj, vk) EMIT(type_3R(0b01110001000011010, vk, vj, vd)) #define VBITCLR_D(vd, vj, vk) EMIT(type_3R(0b01110001000011011, vk, vj, vd)) +#define VBITCLRI_B(vd, vj, imm3) EMIT(type_2RI3(0b0111001100010000001, imm3, vj, vd)) +#define VBITCLRI_H(vd, vj, imm4) EMIT(type_2RI4(0b011100110001000001, imm4, vj, vd)) +#define VBITCLRI_W(vd, vj, imm5) EMIT(type_2RI5(0b01110011000100001, imm5, vj, vd)) +#define VBITCLRI_D(vd, vj, imm6) EMIT(type_2RI6(0b0111001100010001, imm6, vj, vd)) #define VBITSET_B(vd, vj, vk) EMIT(type_3R(0b01110001000011100, vk, vj, vd)) #define VBITSET_H(vd, vj, vk) EMIT(type_3R(0b01110001000011101, vk, vj, vd)) #define VBITSET_W(vd, vj, vk) EMIT(type_3R(0b01110001000011110, vk, vj, vd)) @@ -1736,16 +1740,16 @@ LSX instruction starts with V, LASX instruction starts with XV. #define XVMSKLTZ_D(xd, xj) EMIT(type_2R(0b0111011010011100010011, xj, xd)) #define XVMSKGEZ_B(xd, xj) EMIT(type_2R(0b0111011010011100010100, xj, xd)) #define XVMSKNZ_B(xd, xj) EMIT(type_2R(0b0111011010011100011000, xj, xd)) -#define XVSETEQZ_V(cd, xj) EMIT(type_2R(0b011101101001110010011000, xj, cd & 0b111)) -#define XVSETNEZ_V(cd, xj) EMIT(type_2R(0b011101101001110010011100, xj, cd & 0b111)) -#define XVSETANYEQZ_B(cd, xj) EMIT(type_2R(0b011101101001110010100000, xj, cd & 0b111)) -#define XVSETANYEQZ_H(cd, xj) EMIT(type_2R(0b011101101001110010100100, xj, cd & 0b111)) -#define XVSETANYEQZ_W(cd, xj) EMIT(type_2R(0b011101101001110010101000, xj, cd & 0b111)) -#define XVSETANYEQZ_D(cd, xj) EMIT(type_2R(0b011101101001110010101100, xj, cd & 0b111)) -#define XVSETALLNEZ_B(cd, xj) EMIT(type_2R(0b011101101001110010110000, xj, cd & 0b111)) -#define XVSETALLNEZ_H(cd, xj) EMIT(type_2R(0b011101101001110010110100, xj, cd & 0b111)) -#define XVSETALLNEZ_W(cd, xj) EMIT(type_2R(0b011101101001110010111000, xj, cd & 0b111)) -#define XVSETALLNEZ_D(cd, xj) EMIT(type_2R(0b011101101001110010111100, xj, cd & 0b111)) +#define XVSETEQZ_V(cd, xj) EMIT(type_2R(0b0111011010011100100110, xj, cd & 0b111)) +#define XVSETNEZ_V(cd, xj) EMIT(type_2R(0b0111011010011100100111, xj, cd & 0b111)) +#define XVSETANYEQZ_B(cd, xj) EMIT(type_2R(0b0111011010011100101000, xj, cd & 0b111)) +#define XVSETANYEQZ_H(cd, xj) EMIT(type_2R(0b0111011010011100101001, xj, cd & 0b111)) +#define XVSETANYEQZ_W(cd, xj) EMIT(type_2R(0b0111011010011100101010, xj, cd & 0b111)) +#define XVSETANYEQZ_D(cd, xj) EMIT(type_2R(0b0111011010011100101011, xj, cd & 0b111)) +#define XVSETALLNEZ_B(cd, xj) EMIT(type_2R(0b0111011010011100101100, xj, cd & 0b111)) +#define XVSETALLNEZ_H(cd, xj) EMIT(type_2R(0b0111011010011100101101, xj, cd & 0b111)) +#define XVSETALLNEZ_W(cd, xj) EMIT(type_2R(0b0111011010011100101110, xj, cd & 0b111)) +#define XVSETALLNEZ_D(cd, xj) EMIT(type_2R(0b0111011010011100101111, xj, cd & 0b111)) #define XVFLOGB_S(xd, xj) EMIT(type_2R(0b0111011010011100110001, xj, xd)) #define XVFLOGB_D(xd, xj) EMIT(type_2R(0b0111011010011100110010, xj, xd)) #define XVFCLASS_S(xd, xj) EMIT(type_2R(0b0111011010011100110101, xj, xd)) @@ -1912,6 +1916,15 @@ LSX instruction starts with V, LASX instruction starts with XV. #define XVSRLNI_H_W(vd, vj, imm5) EMIT(type_2RI5(0b01110111010000001, imm5, vj, vd)) #define XVSRLI_W(vd, vj, imm5) EMIT(type_2RI5(0b01110111001100001, imm5, vj, vd)) #define VSETEQZ_V(cd, vj) EMIT(type_2R(0b0111001010011100100110, vj, cd & 0b111)) +#define VSETNEZ_V(cd, vj) EMIT(type_2R(0b0111001010011100100111, vj, cd & 0b111)) +#define VSETANYEQZ_B(cd, vj) EMIT(type_2R(0b0111001010011100101000, vj, cd & 0b111)) +#define VSETANYEQZ_H(cd, vj) EMIT(type_2R(0b0111001010011100101001, vj, cd & 0b111)) +#define VSETANYEQZ_W(cd, vj) EMIT(type_2R(0b0111001010011100101010, vj, cd & 0b111)) +#define VSETANYEQZ_D(cd, vj) EMIT(type_2R(0b0111001010011100101011, vj, cd & 0b111)) +#define VSETALLNEZ_B(cd, vj) EMIT(type_2R(0b0111001010011100101100, vj, cd & 0b111)) +#define VSETALLNEZ_H(cd, vj) EMIT(type_2R(0b0111001010011100101101, vj, cd & 0b111)) +#define VSETALLNEZ_W(cd, vj) EMIT(type_2R(0b0111001010011100101110, vj, cd & 0b111)) +#define VSETALLNEZ_D(cd, vj) EMIT(type_2R(0b0111001010011100101111, vj, cd & 0b111)) #define VINSGR2VR_B(vd, rj, imm4) EMIT(type_2RI4(0b011100101110101110, imm4, rj, vd)) #define VINSGR2VR_H(vd, rj, imm3) EMIT(type_2RI3(0b0111001011101011110, imm3, rj, vd)) #define VINSGR2VR_W(vd, rj, imm2) EMIT(type_2RI2(0b01110010111010111110, imm2, rj, vd)) @@ -3305,4 +3318,40 @@ LSX instruction starts with V, LASX instruction starts with XV. } \ } while (0) +#define VSETEQZ_Vxy(fcc, vd) \ + do { \ + if (vex.l) { \ + XVSETEQZ_V(fcc, vd); \ + } else { \ + VSETEQZ_V(fcc, vd); \ + } \ + } while (0) + +#define VSETNEZ_Vxy(fcc, vd) \ + do { \ + if (vex.l) { \ + XVSETNEZ_V(fcc, vd); \ + } else { \ + VSETNEZ_V(fcc, vd); \ + } \ + } while (0) + +#define VBITCLRIxy(width, vd, vj, imm) \ + do { \ + if (vex.l) { \ + XVBITCLRI_##width(vd, vj, imm); \ + } else { \ + VBITCLRI_##width(vd, vj, imm); \ + } \ + } while (0) + +#define VMSKLTZxy(width, vd, vj) \ + do { \ + if (vex.l) { \ + XVMSKLTZ_##width(vd, vj); \ + } else { \ + VMSKLTZ_##width(vd, vj); \ + } \ + } while (0) + #endif //__LA64_EMITTER_H__ diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c index 99c396cf..2fe47957 100644 --- a/src/dynarec/la64/la64_printer.c +++ b/src/dynarec/la64/la64_printer.c @@ -7620,6 +7620,58 @@ const char* la64_print(uint32_t opcode, uintptr_t addr) snprintf(buff, sizeof(buff), "%-15s %s, %s", "VFRSQRTE.D", Vt[Rd], Vt[Rj]); return buff; } + if (isMask(opcode, "0111001010011100100111jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETNEZ.V", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100101000jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETANYEQZ.B", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100101001jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETANYEQZ.H", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100101010jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETANYEQZ.W", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100101011jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETANYEQZ.D", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100101100jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETALLNEZ.B", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100101101jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETALLNEZ.H", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100101110jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETALLNEZ.W", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001010011100101111jjjjj00ccc", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s", "VSETALLNEZ.D", FCCt[Rc], Vt[Rj]); + return buff; + } + if (isMask(opcode, "0111001100010000001iiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VBITCLRI.B", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "011100110001000001iiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VBITCLRI.H", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "01110011000100001iiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VBITCLRI.W", Vt[Rd], Vt[Rj], imm); + return buff; + } + if (isMask(opcode, "0111001100010001iiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%lx", "VBITCLRI.D", Vt[Rd], Vt[Rj], imm); + return buff; + } snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); return buff; } |