about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-04-27 01:13:06 +0800
committerGitHub <noreply@github.com>2024-04-26 19:13:06 +0200
commitc40096e128b9d03f56baae98f0ad6d3e85258c26 (patch)
tree32ad3ba640d97d94cdf1cf519c7d23b303daace7 /src
parent43d281740778f959f94165386545363591c2f8ea (diff)
downloadbox64-c40096e128b9d03f56baae98f0ad6d3e85258c26.tar.gz
box64-c40096e128b9d03f56baae98f0ad6d3e85258c26.zip
[LA64_DYNAREC] Added more opcodes (#1468)
* Added 0F BF MOVSX opcode

* Added F7 /5 IMUL opcode

* Added 86 XCHG opcode

* Added 66 0F PADDQ opcode

* Added 0F C6 SHUFPS opcode

* Added 66 0F 69 PUNPCKHWD opcode

* Added 66 0F DB PAND opcode

* Test

* Review

* Review
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c42
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c25
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c22
-rw-r--r--src/dynarec/la64/la64_emitter.h79
-rw-r--r--src/dynarec/la64/la64_printer.c17
5 files changed, 174 insertions, 11 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 706094db..3390c6ca 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -9,6 +9,7 @@
 #include "dynarec.h"
 #include "emu/x64emu_private.h"
 #include "emu/x64run_private.h"
+#include "la64_emitter.h"
 #include "x64run.h"
 #include "x64emu.h"
 #include "box64stack.h"
@@ -726,6 +727,25 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(0);
             emit_test32(dyn, ninst, rex, ed, gd, x3, x4, x5);
             break;
+        case 0x86:
+            INST_NAME("(LOCK)XCHG Eb, Gb");
+            nextop = F8;
+            if (MODREG) {
+                GETGB(x1);
+                GETEB(x2, 0);
+                BSTRINS_D(wback, gd, wb2 + 7, wb2);
+                BSTRINS_D(gb1, ed, gb2 + 7, gb2);
+            } else {
+                GETGB(x3);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
+                // AMSWAP_DB_B(x1, gd, ed);
+                SMDMB();
+                LD_BU(x1, ed, 0);
+                ST_B(gd, ed, 0);
+                SMDMB();
+                BSTRINS_D(gb1, x1, gb2 + 7, gb2);
+            }
+            break;
         case 0x87:
             INST_NAME("(LOCK) XCHG Ed, Gd");
             nextop = F8;
@@ -1675,6 +1695,28 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     UFLAG_RES(xRAX);
                     UFLAG_OP1(xRDX);
                     break;
+                case 5:
+                    INST_NAME("IMUL EAX, Ed");
+                    SETFLAGS(X_ALL, SF_PENDING);
+                    UFLAG_DF(x2, rex.w ? d_imul64 : d_imul32);
+                    GETSED(0);
+                    if (rex.w) {
+                        if (ed == xRDX)
+                            gd = x3;
+                        else
+                            gd = xRDX;
+                        MULH_D(gd, xRAX, ed);
+                        MUL_D(xRAX, xRAX, ed);
+                        if (gd != xRDX) { MV(xRDX, gd); }
+                    } else {
+                        ADDI_W(x3, xRAX, 0); // sign extend 32bits-> 64bits
+                        MUL_D(xRDX, x3, ed); // 64 <- 32x32
+                        AND(xRAX, xRDX, xMASK);
+                        SRLI_D(xRDX, xRDX, 32);
+                    }
+                    UFLAG_RES(xRAX);
+                    UFLAG_OP1(xRDX);
+                    break;
                 case 6:
                     INST_NAME("DIV Ed");
                     SETFLAGS(X_ALL, SF_SET);
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index d16c8854..a540f714 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -491,6 +491,31 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             }
             if (!rex.w) ZEROUP(gd);
             break;
+        case 0xBF:
+            INST_NAME("MOVSX Gd, Ew");
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                EXT_W_H(gd, ed);
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, NULL, 1, 0);
+                LD_H(gd, ed, fixedaddress);
+            }
+            if (!rex.w) ZEROUP(gd);
+            break;
+        case 0xC6:
+            INST_NAME("SHUFPS Gx, Ex, Ib");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEX(v1, 0, 1);
+            u8 = F8;
+            if (v0 != v1) {
+                VEXTRINS_D(v0, v1, 0x11); // v0[127:64] = v1[127:64]
+            }
+            VSHUF4I_W(v0, v0, u8);
+            break;
         case 0xC8:
         case 0xC9:
         case 0xCA:
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 6190108f..474bbfc2 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -8,6 +8,7 @@
 #include "dynarec.h"
 #include "emu/x64emu_private.h"
 #include "emu/x64run_private.h"
+#include "la64_emitter.h"
 #include "x64run.h"
 #include "x64emu.h"
 #include "box64stack.h"
@@ -62,6 +63,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(q0, 0, 0);
             VILVL_H(v0, q0, v0);
             break;
+        case 0x69:
+            INST_NAME("PUNPCKHWD Gx,Ex");
+            nextop = F8;
+            GETGX(q0, 1);
+            GETEX(q1, 0, 0);
+            VILVH_H(q0, q1, q0);
+            break;
         case 0x6C:
             INST_NAME("PUNPCKLQDQ Gx,Ex");
             nextop = F8;
@@ -140,6 +148,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             BSTRINS_D(gd, x1, 15, 0);
             break;
+        case 0xD4:
+            INST_NAME("PADDQ Gx, Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEX(q0, 0, 0);
+            VADD_D(v0, v0, q0);
+            break;
         case 0xD6:
             INST_NAME("MOVQ Ex, Gx");
             nextop = F8;
@@ -154,6 +169,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SMWRITE2();
             }
             break;
+        case 0xDB:
+            INST_NAME("PAND Gx,Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEX(q0, 0, 0);
+            VAND_V(v0, v0, q0);
+            break;
         case 0xEF:
             INST_NAME("PXOR Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index a921643c..7860734a 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -685,6 +685,60 @@ f24-f31  fs0-fs7   Static registers                Callee
 #define LDX_HU(rd, rj, rk) EMIT(type_3R(0b00111000001001000, rk, rj, rd))
 #define LDX_WU(rd, rj, rk) EMIT(type_3R(0b00111000001010000, rk, rj, rd))
 
+// Beware the position of rj and rk are swapped in atomic instructions.
+#define AMCAS_B(rd, rk, rj)     EMIT(type_3R(0b00111000010110000, rk, rj, rd))
+#define AMCAS_H(rd, rk, rj)     EMIT(type_3R(0b00111000010110001, rk, rj, rd))
+#define AMCAS_W(rd, rk, rj)     EMIT(type_3R(0b00111000010110010, rk, rj, rd))
+#define AMCAS_D(rd, rk, rj)     EMIT(type_3R(0b00111000010110011, rk, rj, rd))
+#define AMCAS_DB_B(rd, rk, rj)  EMIT(type_3R(0b00111000010110100, rk, rj, rd))
+#define AMCAS_DB_H(rd, rk, rj)  EMIT(type_3R(0b00111000010110101, rk, rj, rd))
+#define AMCAS_DB_W(rd, rk, rj)  EMIT(type_3R(0b00111000010110110, rk, rj, rd))
+#define AMCAS_DB_D(rd, rk, rj)  EMIT(type_3R(0b00111000010110111, rk, rj, rd))
+#define AMSWAP_B(rd, rk, rj)    EMIT(type_3R(0b00111000010111000, rk, rj, rd))
+#define AMSWAP_H(rd, rk, rj)    EMIT(type_3R(0b00111000010111001, rk, rj, rd))
+#define AMADD_B(rd, rk, rj)     EMIT(type_3R(0b00111000010111010, rk, rj, rd))
+#define AMADD_H(rd, rk, rj)     EMIT(type_3R(0b00111000010111011, rk, rj, rd))
+#define AMSWAP_DB_B(rd, rk, rj) EMIT(type_3R(0b00111000010111100, rk, rj, rd))
+#define AMSWAP_DB_H(rd, rk, rj) EMIT(type_3R(0b00111000010111101, rk, rj, rd))
+#define AMADD_DB_B(rd, rk, rj)  EMIT(type_3R(0b00111000010111110, rk, rj, rd))
+#define AMADD_DB_H(rd, rk, rj)  EMIT(type_3R(0b00111000010111111, rk, rj, rd))
+#define AMSWAP_W(rd, rk, rj)    EMIT(type_3R(0b00111000011000000, rk, rj, rd))
+#define AMSWAP_D(rd, rk, rj)    EMIT(type_3R(0b00111000011000001, rk, rj, rd))
+#define AMADD_W(rd, rk, rj)     EMIT(type_3R(0b00111000011000010, rk, rj, rd))
+#define AMADD_D(rd, rk, rj)     EMIT(type_3R(0b00111000011000011, rk, rj, rd))
+#define AMAND_W(rd, rk, rj)     EMIT(type_3R(0b00111000011000100, rk, rj, rd))
+#define AMAND_D(rd, rk, rj)     EMIT(type_3R(0b00111000011000101, rk, rj, rd))
+#define AMOR_W(rd, rk, rj)      EMIT(type_3R(0b00111000011000110, rk, rj, rd))
+#define AMOR_D(rd, rk, rj)      EMIT(type_3R(0b00111000011000111, rk, rj, rd))
+#define AMXOR_W(rd, rk, rj)     EMIT(type_3R(0b00111000011001000, rk, rj, rd))
+#define AMXOR_D(rd, rk, rj)     EMIT(type_3R(0b00111000011001001, rk, rj, rd))
+#define AMMAX_W(rd, rk, rj)     EMIT(type_3R(0b00111000011001010, rk, rj, rd))
+#define AMMAX_D(rd, rk, rj)     EMIT(type_3R(0b00111000011001011, rk, rj, rd))
+#define AMMIN_W(rd, rk, rj)     EMIT(type_3R(0b00111000011001100, rk, rj, rd))
+#define AMMIN_D(rd, rk, rj)     EMIT(type_3R(0b00111000011001101, rk, rj, rd))
+#define AMMAX_WU(rd, rk, rj)    EMIT(type_3R(0b00111000011001110, rk, rj, rd))
+#define AMMAX_DU(rd, rk, rj)    EMIT(type_3R(0b00111000011001111, rk, rj, rd))
+#define AMMIN_WU(rd, rk, rj)    EMIT(type_3R(0b00111000011010000, rk, rj, rd))
+#define AMMIN_DU(rd, rk, rj)    EMIT(type_3R(0b00111000011010001, rk, rj, rd))
+#define AMSWAP_DB_W(rd, rk, rj) EMIT(type_3R(0b00111000011010010, rk, rj, rd))
+#define AMSWAP_DB_D(rd, rk, rj) EMIT(type_3R(0b00111000011010011, rk, rj, rd))
+#define AMADD_DB_W(rd, rk, rj)  EMIT(type_3R(0b00111000011010100, rk, rj, rd))
+#define AMADD_DB_D(rd, rk, rj)  EMIT(type_3R(0b00111000011010101, rk, rj, rd))
+#define AMAND_DB_W(rd, rk, rj)  EMIT(type_3R(0b00111000011010110, rk, rj, rd))
+#define AMAND_DB_D(rd, rk, rj)  EMIT(type_3R(0b00111000011010111, rk, rj, rd))
+#define AMOR_DB_W(rd, rk, rj)   EMIT(type_3R(0b00111000011011000, rk, rj, rd))
+#define AMOR_DB_D(rd, rk, rj)   EMIT(type_3R(0b00111000011011001, rk, rj, rd))
+#define AMXOR_DB_W(rd, rk, rj)  EMIT(type_3R(0b00111000011011010, rk, rj, rd))
+#define AMXOR_DB_D(rd, rk, rj)  EMIT(type_3R(0b00111000011011011, rk, rj, rd))
+#define AMMAX_DB_W(rd, rk, rj)  EMIT(type_3R(0b00111000011011100, rk, rj, rd))
+#define AMMAX_DB_D(rd, rk, rj)  EMIT(type_3R(0b00111000011011101, rk, rj, rd))
+#define AMMIN_DB_W(rd, rk, rj)  EMIT(type_3R(0b00111000011011110, rk, rj, rd))
+#define AMMIN_DB_D(rd, rk, rj)  EMIT(type_3R(0b00111000011011111, rk, rj, rd))
+#define AMMAX_DB_WU(rd, rk, rj) EMIT(type_3R(0b00111000011100000, rk, rj, rd))
+#define AMMAX_DB_DU(rd, rk, rj) EMIT(type_3R(0b00111000011100001, rk, rj, rd))
+#define AMMIN_DB_WU(rd, rk, rj) EMIT(type_3R(0b00111000011100010, rk, rj, rd))
+#define AMMIN_DB_DU(rd, rk, rj) EMIT(type_3R(0b00111000011100011, rk, rj, rd))
+
 #define FLD_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101110, imm12, rj, fd))
 #define FLD_S(fd, rj, imm12) EMIT(type_2RI12(0b0010101100, imm12, rj, fd))
 #define FST_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101111, imm12, rj, fd))
@@ -801,16 +855,16 @@ LSX instruction starts with V, LASX instruction starts with XV.
 
 */
 
-#define VADD_B(vd, vj, vk)          EMIT(type_3R(0b01110000000010100, vj, vj, vd))
-#define VADD_H(vd, vj, vk)          EMIT(type_3R(0b01110000000010101, vj, vj, vd))
-#define VADD_W(vd, vj, vk)          EMIT(type_3R(0b01110000000010110, vj, vj, vd))
-#define VADD_D(vd, vj, vk)          EMIT(type_3R(0b01110000000010111, vj, vj, vd))
-#define VADD_Q(vd, vj, vk)          EMIT(type_3R(0b01110001001011010, vj, vj, vd))
-#define VSUB_B(vd, vj, vk)          EMIT(type_3R(0b01110000000011000, vj, vj, vd))
-#define VSUB_H(vd, vj, vk)          EMIT(type_3R(0b01110000000011001, vj, vj, vd))
-#define VSUB_W(vd, vj, vk)          EMIT(type_3R(0b01110000000011010, vj, vj, vd))
-#define VSUB_D(vd, vj, vk)          EMIT(type_3R(0b01110000000011011, vj, vj, vd))
-#define VSUB_Q(vd, vj, vk)          EMIT(type_3R(0b01110001001011011, vj, vj, vd))
+#define VADD_B(vd, vj, vk)          EMIT(type_3R(0b01110000000010100, vk, vj, vd))
+#define VADD_H(vd, vj, vk)          EMIT(type_3R(0b01110000000010101, vk, vj, vd))
+#define VADD_W(vd, vj, vk)          EMIT(type_3R(0b01110000000010110, vk, vj, vd))
+#define VADD_D(vd, vj, vk)          EMIT(type_3R(0b01110000000010111, vk, vj, vd))
+#define VADD_Q(vd, vj, vk)          EMIT(type_3R(0b01110001001011010, vk, vj, vd))
+#define VSUB_B(vd, vj, vk)          EMIT(type_3R(0b01110000000011000, vk, vj, vd))
+#define VSUB_H(vd, vj, vk)          EMIT(type_3R(0b01110000000011001, vk, vj, vd))
+#define VSUB_W(vd, vj, vk)          EMIT(type_3R(0b01110000000011010, vk, vj, vd))
+#define VSUB_D(vd, vj, vk)          EMIT(type_3R(0b01110000000011011, vk, vj, vd))
+#define VSUB_Q(vd, vj, vk)          EMIT(type_3R(0b01110001001011011, vk, vj, vd))
 #define VSADD_B(vd, vj, vk)         EMIT(type_3R(0b01110000010001100, vk, vj, vd))
 #define VSADD_H(vd, vj, vk)         EMIT(type_3R(0b01110000010001101, vk, vj, vd))
 #define VSADD_W(vd, vj, vk)         EMIT(type_3R(0b01110000010001110, vk, vj, vd))
@@ -1196,6 +1250,10 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VSHUF_H(vd, vj, vk)         EMIT(type_3R(0b01110001011110101, vk, vj, vd))
 #define VSHUF_W(vd, vj, vk)         EMIT(type_3R(0b01110001011110110, vk, vj, vd))
 #define VSHUF_D(vd, vj, vk)         EMIT(type_3R(0b01110001011110111, vk, vj, vd))
+#define VSHUF4I_B(vd, vj, imm8)     EMIT(type_2RI8(0b01110011100100, imm8, vj, vd))
+#define VSHUF4I_H(vd, vj, imm8)     EMIT(type_2RI8(0b01110011100101, imm8, vj, vd))
+#define VSHUF4I_W(vd, vj, imm8)     EMIT(type_2RI8(0b01110011100110, imm8, vj, vd))
+#define VSHUF4I_D(vd, vj, imm8)     EMIT(type_2RI8(0b01110011100111, imm8, vj, vd))
 #define VEXTRINS_D(vd, vj, imm8)    EMIT(type_2RI8(0b01110011100000, imm8, vj, vd))
 #define VEXTRINS_W(vd, vj, imm8)    EMIT(type_2RI8(0b01110011100001, imm8, vj, vd))
 #define VEXTRINS_H(vd, vj, imm8)    EMIT(type_2RI8(0b01110011100010, imm8, vj, vd))
@@ -1203,7 +1261,6 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VLD(vd, rj, imm12)          EMIT(type_2RI12(0b0010110000, imm12, rj, vd))
 #define VST(vd, rj, imm12)          EMIT(type_2RI12(0b0010110001, imm12, rj, vd))
 
-
 #define XVADD_B(vd, vj, vk)          EMIT(type_3R(0b01110100000010100, vk, vj, vd))
 #define XVADD_H(vd, vj, vk)          EMIT(type_3R(0b01110100000010101, vk, vj, vd))
 #define XVADD_W(vd, vj, vk)          EMIT(type_3R(0b01110100000010110, vk, vj, vd))
diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c
index 04dd5ccd..3d4cf991 100644
--- a/src/dynarec/la64/la64_printer.c
+++ b/src/dynarec/la64/la64_printer.c
@@ -2123,6 +2123,23 @@ const char* la64_print(uint32_t opcode, uintptr_t addr)
         snprintf(buff, sizeof(buff), "%-15s %s, %s, %s", "VSHUF.D", Vt[Rd], Vt[Rj], Vt[Rk]);
         return buff;
     }
+    if (isMask(opcode, "01110011100100iiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VSHUF4I.B", Vt[Rd], Vt[Rj], imm);
+        return buff;
+    }
+    if (isMask(opcode, "01110011100101iiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VSHUF4I.H", Vt[Rd], Vt[Rj], imm);
+
+        return buff;
+    }
+    if (isMask(opcode, "01110011100110iiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VSHUF4I.W", Vt[Rd], Vt[Rj], imm);
+        return buff;
+    }
+    if (isMask(opcode, "01110011100111iiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, 0x%x", "VSHUF4I.D", Vt[Rd], Vt[Rj], imm);
+        return buff;
+    }
     if (isMask(opcode, "01110011100000iiiiiiiijjjjjddddd", &a)) {
         snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "VEXTRINS.D", Vt[Rd], Vt[Rj], signExtend(imm, 8));
         return buff;