diff options
| author | Yang Liu <numbksco@gmail.com> | 2024-04-28 00:33:58 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-27 18:33:58 +0200 |
| commit | ae386b3a7d171fb2f307f9e641821f0bbcb76f70 (patch) | |
| tree | 2978bfd6196a63c4c6edc622180c3b79f14f6a51 /src/dynarec | |
| parent | 2b8a8314c5879473875b339172b2ef2b7b9b7ff6 (diff) | |
| download | box64-ae386b3a7d171fb2f307f9e641821f0bbcb76f70.tar.gz box64-ae386b3a7d171fb2f307f9e641821f0bbcb76f70.zip | |
[LA64_DYNAREC] Added more opcodes (#1473)
* Added 66 0F PSHUFB opcode * Added 66 0F PHADDW opcode * Added 66 0F 38 02 PHADDD opcode * Added 66 0F 38 03 PHADDSW opcode * Added 66 0F 38 04 PMADDUBSW opcode * Added 66 0F 38 05..0A opcodes * Upgrade QEMU
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 122 | ||||
| -rw-r--r-- | src/dynarec/la64/la64_emitter.h | 35 |
2 files changed, 157 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 23ad1fc6..68f277bd 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -72,6 +72,128 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VLD(v0, ed, fixedaddress); } break; + case 0x38: // SSSE3 opcodes + nextop = F8; + switch (nextop) { + case 0x00: + INST_NAME("PSHUFB Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VLDI(v0, 0b0000010001111); // broadcast 0b10001111 as byte + VAND_V(v0, v0, q1); + VMINI_BU(v0, v0, 0x1f); + VXOR_V(v1, v1, v1); + VSHUF_B(q0, v1, q0, v0); + break; + case 0x01: + INST_NAME("PHADDW Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VPICKEV_H(v0, q1, q0); + VPICKOD_H(v1, q1, q0); + VADD_H(q0, v0, v1); + break; + case 0x02: + INST_NAME("PHADDD Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VPICKEV_W(v0, q1, q0); + VPICKOD_W(v1, q1, q0); + VADD_W(q0, v0, v1); + break; + case 0x03: + INST_NAME("PHADDSW Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VPICKEV_H(v0, q1, q0); + VPICKOD_H(v1, q1, q0); + VSADD_H(q0, v0, v1); + break; + case 0x04: + INST_NAME("PMADDUBSW Gx,Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VEXT2XV_HU_BU(v0, q0); + VEXT2XV_H_B(v1, q1); + XVMUL_H(v0, v0, v1); + XVPERMI_Q(v1, v0, 1); // v1[127:0] = v0[255:128]; + VPICKEV_H(q0, v1, v0); + VPICKOD_H(v0, v1, v0); + VSADD_H(q0, v0, q0); + break; + case 0x05: + INST_NAME("PHSUBW Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VPICKEV_H(v0, q1, q0); + VPICKOD_H(v1, q1, q0); + VSUB_H(q0, v0, v1); + break; + case 0x06: + INST_NAME("PHSUBD Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VPICKEV_W(v0, q1, q0); + VPICKOD_W(v1, q1, q0); + VSUB_D(q0, v0, v1); + break; + case 0x07: + INST_NAME("PHSUBSW Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + v0 = fpu_get_scratch(dyn); + v1 = fpu_get_scratch(dyn); + VPICKEV_H(v0, q1, q0); + VPICKOD_H(v1, q1, q0); + VSSUB_H(q0, v0, v1); + break; + case 0x08: + INST_NAME("PSIGNB Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + VSIGNCOV_B(q0, q1, q0); + break; + case 0x09: + INST_NAME("PSIGNW Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + VSIGNCOV_H(q0, q1, q0); + break; + case 0x0A: + INST_NAME("PSIGND Gx, Ex"); + nextop = F8; + GETGX(q0, 1); + GETEX(q1, 0, 0); + VSIGNCOV_W(q0, q1, q0); + break; + default: + DEFAULT; + } + break; case 0x61: INST_NAME("PUNPCKLWD Gx,Ex"); nextop = F8; diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h index 778e1a83..afe2f5fd 100644 --- a/src/dynarec/la64/la64_emitter.h +++ b/src/dynarec/la64/la64_emitter.h @@ -160,6 +160,7 @@ f24-f31 fs0-fs7 Static registers Callee #define type_I26(opc, imm26) ((opc) << 26 | ((imm26) & 0xFFFF) << 10 | ((imm26 >> 16) & 0x3FF)) // Made-up formats not found in the spec. +#define type_1RI13(opc, imm13, rd) ((opc) << 18 | ((imm13) & 0x1FFFF) << 5 | (rd)) #define type_2RI3(opc, imm3, rj, rd) ((opc) << 13 | ((imm3) & 0x7 ) << 10 | (rj) << 5 | (rd)) #define type_2RI4(opc, imm4, rj, rd) ((opc) << 14 | ((imm4) & 0xF ) << 10 | (rj) << 5 | (rd)) #define type_2RI5(opc, imm5, rj, rd) ((opc) << 15 | ((imm5) & 0x1F) << 10 | (rj) << 5 | (rd)) @@ -1000,6 +1001,14 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VADDA_H(vd, vj, vk) EMIT(type_3R(0b01110000010111001, vk, vj, vd)) #define VADDA_W(vd, vj, vk) EMIT(type_3R(0b01110000010111010, vk, vj, vd)) #define VADDA_D(vd, vj, vk) EMIT(type_3R(0b01110000010111011, vk, vj, vd)) +#define VMAXI_B(vd, vj, imm5) EMIT(type_3R(0b01110010100100000, imm5, vj, vd)) +#define VMAXI_H(vd, vj, imm5) EMIT(type_3R(0b01110010100100001, imm5, vj, vd)) +#define VMAXI_W(vd, vj, imm5) EMIT(type_3R(0b01110010100100010, imm5, vj, vd)) +#define VMAXI_D(vd, vj, imm5) EMIT(type_3R(0b01110010100100011, imm5, vj, vd)) +#define VMAXI_BU(vd, vj, imm5) EMIT(type_3R(0b01110010100101000, imm5, vj, vd)) +#define VMAXI_HU(vd, vj, imm5) EMIT(type_3R(0b01110010100101001, imm5, vj, vd)) +#define VMAXI_WU(vd, vj, imm5) EMIT(type_3R(0b01110010100101010, imm5, vj, vd)) +#define VMAXI_DU(vd, vj, imm5) EMIT(type_3R(0b01110010100101011, imm5, vj, vd)) #define VMAX_B(vd, vj, vk) EMIT(type_3R(0b01110000011100000, vk, vj, vd)) #define VMAX_H(vd, vj, vk) EMIT(type_3R(0b01110000011100001, vk, vj, vd)) #define VMAX_W(vd, vj, vk) EMIT(type_3R(0b01110000011100010, vk, vj, vd)) @@ -1008,6 +1017,14 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VMAX_HU(vd, vj, vk) EMIT(type_3R(0b01110000011101001, vk, vj, vd)) #define VMAX_WU(vd, vj, vk) EMIT(type_3R(0b01110000011101010, vk, vj, vd)) #define VMAX_DU(vd, vj, vk) EMIT(type_3R(0b01110000011101011, vk, vj, vd)) +#define VMINI_B(vd, vj, imm5) EMIT(type_3R(0b01110010100100100, imm5, vj, vd)) +#define VMINI_H(vd, vj, imm5) EMIT(type_3R(0b01110010100100101, imm5, vj, vd)) +#define VMINI_W(vd, vj, imm5) EMIT(type_3R(0b01110010100100110, imm5, vj, vd)) +#define VMINI_D(vd, vj, imm5) EMIT(type_3R(0b01110010100100111, imm5, vj, vd)) +#define VMINI_BU(vd, vj, imm5) EMIT(type_3R(0b01110010100101100, imm5, vj, vd)) +#define VMINI_HU(vd, vj, imm5) EMIT(type_3R(0b01110010100101101, imm5, vj, vd)) +#define VMINI_WU(vd, vj, imm5) EMIT(type_3R(0b01110010100101110, imm5, vj, vd)) +#define VMINI_DU(vd, vj, imm5) EMIT(type_3R(0b01110010100101111, imm5, vj, vd)) #define VMIN_B(vd, vj, vk) EMIT(type_3R(0b01110000011100100, vk, vj, vd)) #define VMIN_H(vd, vj, vk) EMIT(type_3R(0b01110000011100101, vk, vj, vd)) #define VMIN_W(vd, vj, vk) EMIT(type_3R(0b01110000011100110, vk, vj, vd)) @@ -1105,6 +1122,7 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VSIGNCOV_W(vd, vj, vk) EMIT(type_3R(0b01110001001011110, vk, vj, vd)) #define VSIGNCOV_D(vd, vj, vk) EMIT(type_3R(0b01110001001011111, vk, vj, vd)) #define VAND_V(vd, vj, vk) EMIT(type_3R(0b01110001001001100, vk, vj, vd)) +#define VLDI(vd, imm13) EMIT(type_1RI13(0b01110011111000, imm13, vd)) #define VOR_V(vd, vj, vk) EMIT(type_3R(0b01110001001001101, vk, vj, vd)) #define VXOR_V(vd, vj, vk) EMIT(type_3R(0b01110001001001110, vk, vj, vd)) #define VNOR_V(vd, vj, vk) EMIT(type_3R(0b01110001001001111, vk, vj, vd)) @@ -1282,6 +1300,7 @@ LSX instruction starts with V, LASX instruction starts with XV. #define VILVH_H(vd, vj, vk) EMIT(type_3R(0b01110001000111001, vk, vj, vd)) #define VILVH_W(vd, vj, vk) EMIT(type_3R(0b01110001000111010, vk, vj, vd)) #define VILVH_D(vd, vj, vk) EMIT(type_3R(0b01110001000111011, vk, vj, vd)) +#define VSHUF_B(vd, vj, vk, va) EMIT(type_4R(0b000011010101, va, vk, vj, vd)) #define VSHUF_H(vd, vj, vk) EMIT(type_3R(0b01110001011110101, vk, vj, vd)) #define VSHUF_W(vd, vj, vk) EMIT(type_3R(0b01110001011110110, vk, vj, vd)) #define VSHUF_D(vd, vj, vk) EMIT(type_3R(0b01110001011110111, vk, vj, vd)) @@ -1650,6 +1669,22 @@ LSX instruction starts with V, LASX instruction starts with XV. #define XVSHUF_W(vd, vj, vk) EMIT(type_3R(0b01110101011110110, vk, vj, vd)) #define XVSHUF_D(vd, vj, vk) EMIT(type_3R(0b01110101011110111, vk, vj, vd)) #define XVPERM_W(vd, vj, vk) EMIT(type_3R(0b01110101011111010, vk, vj, vd)) +#define XVPERMI_W(vd, vj, imm8) EMIT(type_2RI8(0b01110111111001, imm8, vj, vd)) +#define XVPERMI_D(vd, vj, imm8) EMIT(type_2RI8(0b01110111111010, imm8, vj, vd)) +#define XVPERMI_Q(vd, vj, imm8) EMIT(type_2RI8(0b01110111111011, imm8, vj, vd)) + +#define VEXT2XV_H_B(vd, vj) EMIT(type_2R(0b0111011010011111000100, vj, vd)) +#define VEXT2XV_W_B(vd, vj) EMIT(type_2R(0b0111011010011111000101, vj, vd)) +#define VEXT2XV_D_B(vd, vj) EMIT(type_2R(0b0111011010011111000110, vj, vd)) +#define VEXT2XV_W_H(vd, vj) EMIT(type_2R(0b0111011010011111000111, vj, vd)) +#define VEXT2XV_D_H(vd, vj) EMIT(type_2R(0b0111011010011111001000, vj, vd)) +#define VEXT2XV_D_W(vd, vj) EMIT(type_2R(0b0111011010011111001001, vj, vd)) +#define VEXT2XV_HU_BU(vd, vj) EMIT(type_2R(0b0111011010011111001010, vj, vd)) +#define VEXT2XV_WU_BU(vd, vj) EMIT(type_2R(0b0111011010011111001011, vj, vd)) +#define VEXT2XV_DU_BU(vd, vj) EMIT(type_2R(0b0111011010011111001100, vj, vd)) +#define VEXT2XV_WU_HU(vd, vj) EMIT(type_2R(0b0111011010011111001101, vj, vd)) +#define VEXT2XV_DU_HU(vd, vj) EMIT(type_2R(0b0111011010011111001110, vj, vd)) +#define VEXT2XV_DU_WU(vd, vj) EMIT(type_2R(0b0111011010011111001111, vj, vd)) //////////////////////////////////////////////////////////////////////////////// // (undocumented) LBT extension instructions |