about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-04-28 00:33:58 +0800
committerGitHub <noreply@github.com>2024-04-27 18:33:58 +0200
commitae386b3a7d171fb2f307f9e641821f0bbcb76f70 (patch)
tree2978bfd6196a63c4c6edc622180c3b79f14f6a51 /src/dynarec
parent2b8a8314c5879473875b339172b2ef2b7b9b7ff6 (diff)
downloadbox64-ae386b3a7d171fb2f307f9e641821f0bbcb76f70.tar.gz
box64-ae386b3a7d171fb2f307f9e641821f0bbcb76f70.zip
[LA64_DYNAREC] Added more opcodes (#1473)
* Added 66 0F PSHUFB opcode

* Added 66 0F PHADDW opcode

* Added 66 0F 38 02 PHADDD opcode

* Added 66 0F 38 03 PHADDSW opcode

* Added 66 0F 38 04 PMADDUBSW opcode

* Added 66 0F 38 05..0A opcodes

* Upgrade QEMU
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c122
-rw-r--r--src/dynarec/la64/la64_emitter.h35
2 files changed, 157 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 23ad1fc6..68f277bd 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -72,6 +72,128 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 VLD(v0, ed, fixedaddress);
             }
             break;
+        case 0x38: // SSSE3 opcodes
+            nextop = F8;
+            switch (nextop) {
+                case 0x00:
+                    INST_NAME("PSHUFB Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VLDI(v0, 0b0000010001111); // broadcast 0b10001111 as byte
+                    VAND_V(v0, v0, q1);
+                    VMINI_BU(v0, v0, 0x1f);
+                    VXOR_V(v1, v1, v1);
+                    VSHUF_B(q0, v1, q0, v0);
+                    break;
+                case 0x01:
+                    INST_NAME("PHADDW Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VPICKEV_H(v0, q1, q0);
+                    VPICKOD_H(v1, q1, q0);
+                    VADD_H(q0, v0, v1);
+                    break;
+                case 0x02:
+                    INST_NAME("PHADDD Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VPICKEV_W(v0, q1, q0);
+                    VPICKOD_W(v1, q1, q0);
+                    VADD_W(q0, v0, v1);
+                    break;
+                case 0x03:
+                    INST_NAME("PHADDSW Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VPICKEV_H(v0, q1, q0);
+                    VPICKOD_H(v1, q1, q0);
+                    VSADD_H(q0, v0, v1);
+                    break;
+                case 0x04:
+                    INST_NAME("PMADDUBSW Gx,Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VEXT2XV_HU_BU(v0, q0);
+                    VEXT2XV_H_B(v1, q1);
+                    XVMUL_H(v0, v0, v1);
+                    XVPERMI_Q(v1, v0, 1); // v1[127:0] = v0[255:128];
+                    VPICKEV_H(q0, v1, v0);
+                    VPICKOD_H(v0, v1, v0);
+                    VSADD_H(q0, v0, q0);
+                    break;
+                case 0x05:
+                    INST_NAME("PHSUBW Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VPICKEV_H(v0, q1, q0);
+                    VPICKOD_H(v1, q1, q0);
+                    VSUB_H(q0, v0, v1);
+                    break;
+                case 0x06:
+                    INST_NAME("PHSUBD Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VPICKEV_W(v0, q1, q0);
+                    VPICKOD_W(v1, q1, q0);
+                    VSUB_D(q0, v0, v1);
+                    break;
+                case 0x07:
+                    INST_NAME("PHSUBSW Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    v0 = fpu_get_scratch(dyn);
+                    v1 = fpu_get_scratch(dyn);
+                    VPICKEV_H(v0, q1, q0);
+                    VPICKOD_H(v1, q1, q0);
+                    VSSUB_H(q0, v0, v1);
+                    break;
+                case 0x08:
+                    INST_NAME("PSIGNB Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    VSIGNCOV_B(q0, q1, q0);
+                    break;
+                case 0x09:
+                    INST_NAME("PSIGNW Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    VSIGNCOV_H(q0, q1, q0);
+                    break;
+                case 0x0A:
+                    INST_NAME("PSIGND Gx, Ex");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETEX(q1, 0, 0);
+                    VSIGNCOV_W(q0, q1, q0);
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
         case 0x61:
             INST_NAME("PUNPCKLWD Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 778e1a83..afe2f5fd 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -160,6 +160,7 @@ f24-f31  fs0-fs7   Static registers                Callee
 #define type_I26(opc, imm26)             ((opc) << 26 | ((imm26) & 0xFFFF) << 10 | ((imm26 >> 16) & 0x3FF))
 
 // Made-up formats not found in the spec.
+#define type_1RI13(opc, imm13, rd)       ((opc) << 18 | ((imm13) & 0x1FFFF) << 5 | (rd))
 #define type_2RI3(opc, imm3, rj, rd)     ((opc) << 13 | ((imm3)  & 0x7 )  << 10 | (rj) << 5 | (rd))
 #define type_2RI4(opc, imm4, rj, rd)     ((opc) << 14 | ((imm4)  & 0xF )  << 10 | (rj) << 5 | (rd))
 #define type_2RI5(opc, imm5, rj, rd)     ((opc) << 15 | ((imm5)  & 0x1F)  << 10 | (rj) << 5 | (rd))
@@ -1000,6 +1001,14 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VADDA_H(vd, vj, vk)         EMIT(type_3R(0b01110000010111001, vk, vj, vd))
 #define VADDA_W(vd, vj, vk)         EMIT(type_3R(0b01110000010111010, vk, vj, vd))
 #define VADDA_D(vd, vj, vk)         EMIT(type_3R(0b01110000010111011, vk, vj, vd))
+#define VMAXI_B(vd, vj, imm5)       EMIT(type_3R(0b01110010100100000, imm5, vj, vd))
+#define VMAXI_H(vd, vj, imm5)       EMIT(type_3R(0b01110010100100001, imm5, vj, vd))
+#define VMAXI_W(vd, vj, imm5)       EMIT(type_3R(0b01110010100100010, imm5, vj, vd))
+#define VMAXI_D(vd, vj, imm5)       EMIT(type_3R(0b01110010100100011, imm5, vj, vd))
+#define VMAXI_BU(vd, vj, imm5)      EMIT(type_3R(0b01110010100101000, imm5, vj, vd))
+#define VMAXI_HU(vd, vj, imm5)      EMIT(type_3R(0b01110010100101001, imm5, vj, vd))
+#define VMAXI_WU(vd, vj, imm5)      EMIT(type_3R(0b01110010100101010, imm5, vj, vd))
+#define VMAXI_DU(vd, vj, imm5)      EMIT(type_3R(0b01110010100101011, imm5, vj, vd))
 #define VMAX_B(vd, vj, vk)          EMIT(type_3R(0b01110000011100000, vk, vj, vd))
 #define VMAX_H(vd, vj, vk)          EMIT(type_3R(0b01110000011100001, vk, vj, vd))
 #define VMAX_W(vd, vj, vk)          EMIT(type_3R(0b01110000011100010, vk, vj, vd))
@@ -1008,6 +1017,14 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VMAX_HU(vd, vj, vk)         EMIT(type_3R(0b01110000011101001, vk, vj, vd))
 #define VMAX_WU(vd, vj, vk)         EMIT(type_3R(0b01110000011101010, vk, vj, vd))
 #define VMAX_DU(vd, vj, vk)         EMIT(type_3R(0b01110000011101011, vk, vj, vd))
+#define VMINI_B(vd, vj, imm5)          EMIT(type_3R(0b01110010100100100, imm5, vj, vd))
+#define VMINI_H(vd, vj, imm5)          EMIT(type_3R(0b01110010100100101, imm5, vj, vd))
+#define VMINI_W(vd, vj, imm5)          EMIT(type_3R(0b01110010100100110, imm5, vj, vd))
+#define VMINI_D(vd, vj, imm5)          EMIT(type_3R(0b01110010100100111, imm5, vj, vd))
+#define VMINI_BU(vd, vj, imm5)         EMIT(type_3R(0b01110010100101100, imm5, vj, vd))
+#define VMINI_HU(vd, vj, imm5)         EMIT(type_3R(0b01110010100101101, imm5, vj, vd))
+#define VMINI_WU(vd, vj, imm5)         EMIT(type_3R(0b01110010100101110, imm5, vj, vd))
+#define VMINI_DU(vd, vj, imm5)         EMIT(type_3R(0b01110010100101111, imm5, vj, vd))
 #define VMIN_B(vd, vj, vk)          EMIT(type_3R(0b01110000011100100, vk, vj, vd))
 #define VMIN_H(vd, vj, vk)          EMIT(type_3R(0b01110000011100101, vk, vj, vd))
 #define VMIN_W(vd, vj, vk)          EMIT(type_3R(0b01110000011100110, vk, vj, vd))
@@ -1105,6 +1122,7 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VSIGNCOV_W(vd, vj, vk)      EMIT(type_3R(0b01110001001011110, vk, vj, vd))
 #define VSIGNCOV_D(vd, vj, vk)      EMIT(type_3R(0b01110001001011111, vk, vj, vd))
 #define VAND_V(vd, vj, vk)          EMIT(type_3R(0b01110001001001100, vk, vj, vd))
+#define VLDI(vd, imm13)             EMIT(type_1RI13(0b01110011111000, imm13, vd))
 #define VOR_V(vd, vj, vk)           EMIT(type_3R(0b01110001001001101, vk, vj, vd))
 #define VXOR_V(vd, vj, vk)          EMIT(type_3R(0b01110001001001110, vk, vj, vd))
 #define VNOR_V(vd, vj, vk)          EMIT(type_3R(0b01110001001001111, vk, vj, vd))
@@ -1282,6 +1300,7 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VILVH_H(vd, vj, vk)         EMIT(type_3R(0b01110001000111001, vk, vj, vd))
 #define VILVH_W(vd, vj, vk)         EMIT(type_3R(0b01110001000111010, vk, vj, vd))
 #define VILVH_D(vd, vj, vk)         EMIT(type_3R(0b01110001000111011, vk, vj, vd))
+#define VSHUF_B(vd, vj, vk, va)     EMIT(type_4R(0b000011010101, va, vk, vj, vd))
 #define VSHUF_H(vd, vj, vk)         EMIT(type_3R(0b01110001011110101, vk, vj, vd))
 #define VSHUF_W(vd, vj, vk)         EMIT(type_3R(0b01110001011110110, vk, vj, vd))
 #define VSHUF_D(vd, vj, vk)         EMIT(type_3R(0b01110001011110111, vk, vj, vd))
@@ -1650,6 +1669,22 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define XVSHUF_W(vd, vj, vk)         EMIT(type_3R(0b01110101011110110, vk, vj, vd))
 #define XVSHUF_D(vd, vj, vk)         EMIT(type_3R(0b01110101011110111, vk, vj, vd))
 #define XVPERM_W(vd, vj, vk)         EMIT(type_3R(0b01110101011111010, vk, vj, vd))
+#define XVPERMI_W(vd, vj, imm8)      EMIT(type_2RI8(0b01110111111001, imm8, vj, vd))
+#define XVPERMI_D(vd, vj, imm8)      EMIT(type_2RI8(0b01110111111010, imm8, vj, vd))
+#define XVPERMI_Q(vd, vj, imm8)      EMIT(type_2RI8(0b01110111111011, imm8, vj, vd))
+
+#define VEXT2XV_H_B(vd, vj)          EMIT(type_2R(0b0111011010011111000100, vj, vd))
+#define VEXT2XV_W_B(vd, vj)          EMIT(type_2R(0b0111011010011111000101, vj, vd))
+#define VEXT2XV_D_B(vd, vj)          EMIT(type_2R(0b0111011010011111000110, vj, vd))
+#define VEXT2XV_W_H(vd, vj)          EMIT(type_2R(0b0111011010011111000111, vj, vd))
+#define VEXT2XV_D_H(vd, vj)          EMIT(type_2R(0b0111011010011111001000, vj, vd))
+#define VEXT2XV_D_W(vd, vj)          EMIT(type_2R(0b0111011010011111001001, vj, vd))
+#define VEXT2XV_HU_BU(vd, vj)        EMIT(type_2R(0b0111011010011111001010, vj, vd))
+#define VEXT2XV_WU_BU(vd, vj)        EMIT(type_2R(0b0111011010011111001011, vj, vd))
+#define VEXT2XV_DU_BU(vd, vj)        EMIT(type_2R(0b0111011010011111001100, vj, vd))
+#define VEXT2XV_WU_HU(vd, vj)        EMIT(type_2R(0b0111011010011111001101, vj, vd))
+#define VEXT2XV_DU_HU(vd, vj)        EMIT(type_2R(0b0111011010011111001110, vj, vd))
+#define VEXT2XV_DU_WU(vd, vj)        EMIT(type_2R(0b0111011010011111001111, vj, vd))
 
 ////////////////////////////////////////////////////////////////////////////////
 // (undocumented) LBT extension instructions