about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-04-15 02:39:31 +0800
committerGitHub <noreply@github.com>2024-04-14 20:39:31 +0200
commit067cf75c658c632b7c476036e6b21d985cbe67a2 (patch)
treed359d5ee81e69b4c221209e194a1f14e261b7775 /src/dynarec
parent7f16ddacf7707e8c8b696a62c5c4bd4b4d0695ab (diff)
downloadbox64-067cf75c658c632b7c476036e6b21d985cbe67a2.tar.gz
box64-067cf75c658c632b7c476036e6b21d985cbe67a2.zip
[LA64_DYNAREC] Added more opcodes (#1447)
* Added B4..B7 MOV opcode

* Added 66 81,83 /5 SUB opcode

* Added 30 XOR opcode

* Added F0 01 LOCK ADD opcode

* Added 66 81,83 /0 ADD opcode

* Added 66 39 CMP opcode

* Added 66 0F D6 MOVQ opcode

* Added 0F 57 XORPS opcode

* Fix

* Added 66 0F 61 opcode
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c24
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c15
-rw-r--r--src/dynarec/la64/dynarec_la64_66.c40
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c21
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_logic.c36
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c116
-rw-r--r--src/dynarec/la64/dynarec_la64_f0.c22
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h6
-rw-r--r--src/dynarec/la64/la64_printer.c8
9 files changed, 284 insertions, 4 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 72688d4f..ebb6f4f8 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -274,6 +274,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             i64 = F32S;
             emit_sub32c(dyn, ninst, rex, xRAX, i64, x2, x3, x4, x5);
             break;
+        case 0x30:
+            INST_NAME("XOR Eb, Gb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_xor8(dyn, ninst, x1, x2, x4, x5);
+            EBBACK();
+            break;
         case 0x31:
             INST_NAME("XOR Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -1002,6 +1011,21 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 gb1 = TO_LA64(opcode & 3);
             BSTRINS_D(gb1, x1, 7, 0);
             break;
+        case 0xB4:
+        case 0xB5:
+        case 0xB6:
+        case 0xB7:
+            INST_NAME("MOV xH, Ib");
+            u8 = F8;
+            MOV32w(x1, u8);
+            if (rex.rex) {
+                gb1 = TO_LA64((opcode & 7) + (rex.b << 3));
+                BSTRINS_D(gb1, x1, 7, 0);
+            } else {
+                gb1 = TO_LA64(opcode & 3);
+                BSTRINS_D(gb1, x1, 15, 8);
+            }
+            break;
         case 0xB8:
         case 0xB9:
         case 0xBA:
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index d7184f2c..d16c8854 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -184,6 +184,21 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
         #undef GO
 
+        case 0x57:
+            INST_NAME("XORPS Gx, Ex");
+            nextop = F8;
+            GETG;
+            if (MODREG && ((nextop & 7) + (rex.b << 3) == gd)) {
+                // special case for XORPS Gx, Gx
+                q0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                VXOR_V(q0, q0, q0);
+            } else {
+                q0 = sse_get_reg(dyn, ninst, x1, gd, 1);
+                GETEX(q1, 0, 0);
+                VXOR_V(q0, q0, q1);
+            }
+            break;
+
         #define GO(GETFLAGS, NO, YES, F, I)                                                         \
             READFLAGS(F);                                                                           \
             i32_ = F32S;                                                                            \
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index e7a358eb..626e33d7 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -65,10 +65,50 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+        case 0x39:
+            INST_NAME("CMP Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x2);
+            GETEW(x1, 0);
+            emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6);
+            break;
         case 0x81:
         case 0x83:
             nextop = F8;
             switch ((nextop >> 3) & 7) {
+                case 0: // ADD
+                    if (opcode == 0x81) {
+                        INST_NAME("ADD Ew, Iw");
+                    } else {
+                        INST_NAME("ADD Ew, Ib");
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEW(x1, (opcode == 0x81) ? 2 : 1);
+                    if (opcode == 0x81)
+                        i16 = F16S;
+                    else
+                        i16 = F8S;
+                    MOV64x(x5, i16);
+                    emit_add16(dyn, ninst, ed, x5, x2, x4, x6);
+                    EWBACK;
+                    break;
+                case 5: // SUB
+                    if (opcode == 0x81) {
+                        INST_NAME("SUB Ew, Iw");
+                    } else {
+                        INST_NAME("SUB Ew, Ib");
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEW(x1, (opcode == 0x81) ? 2 : 1);
+                    if (opcode == 0x81)
+                        i16 = F16S;
+                    else
+                        i16 = F8S;
+                    MOV32w(x5, i16);
+                    emit_sub16(dyn, ninst, x1, x5, x2, x4, x6);
+                    EWBACK;
+                    break;
                 case 7: // CMP
                     if (opcode == 0x81) {
                         INST_NAME("CMP Ew, Iw");
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index cdafe2e1..6190108f 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -55,6 +55,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             FAKEED;
             break;
+        case 0x61:
+            INST_NAME("PUNPCKLWD Gx,Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEX(q0, 0, 0);
+            VILVL_H(v0, q0, v0);
+            break;
         case 0x6C:
             INST_NAME("PUNPCKLQDQ Gx,Ex");
             nextop = F8;
@@ -133,6 +140,20 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             BSTRINS_D(gd, x1, 15, 0);
             break;
+        case 0xD6:
+            INST_NAME("MOVQ Ex, Gx");
+            nextop = F8;
+            GETGX(v0, 0);
+            if (MODREG) {
+                v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop & 7) + (rex.b << 3));
+                VXOR_V(v1, v1, v1);
+                VEXTRINS_D(v1, v0, 0);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                FST_D(v0, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
         case 0xEF:
             INST_NAME("PXOR Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c
index d3affee6..dc6995e4 100644
--- a/src/dynarec/la64/dynarec_la64_emit_logic.c
+++ b/src/dynarec/la64/dynarec_la64_emit_logic.c
@@ -21,6 +21,42 @@
 #include "dynarec_la64_functions.h"
 #include "dynarec_la64_helper.h"
 
+// emit XOR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
+void emit_xor8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+{
+    IFX (X_PEND) {
+        SET_DF(s4, d_xor8);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        X64_XOR_B(s1, s2);
+    }
+
+    XOR(s1, s1, s2);
+    ANDI(s1, s1, 0xff);
+
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) return;
+
+    CLEAR_FLAGS(s3);
+    IFX (X_SF) {
+        SRLI_D(s3, s1, 7);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit XOR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index 4c5f194f..1884ea9e 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -399,6 +399,77 @@ void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
     }
 }
 
+// emit ADD16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
+void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        ST_H(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_add16);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+    IFXA (X_AF | X_OF, !la64_lbt) {
+        OR(s3, s1, s2);  // s3 = op1 | op2
+        AND(s4, s1, s2); // s4 = op1 & op2
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        X64_ADD_DU(s1, s2);
+    }
+
+    ADD_D(s1, s1, s2);
+
+    IFX (X_PEND) {
+        ST_W(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) {
+        BSTRPICK_D(s1, s1, 15, 0);
+        return;
+    }
+
+    CLEAR_FLAGS(s5);
+    IFX (X_AF | X_OF) {
+        ANDN(s3, s3, s1); // s3 = ~res & (op1 | op2)
+        OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
+        IFX (X_AF) {
+            ANDI(s4, s3, 0x08); // AF: cc & 0x08
+            BEQZ(s4, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX (X_OF) {
+            SRLI_D(s3, s3, 14);
+            SRLI_D(s4, s3, 1);
+            XOR(s3, s3, s4);
+            ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF);
+        }
+    }
+
+    IFX (X_CF) {
+        SRLI_D(s3, s1, 16);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+
+    BSTRPICK_D(s1, s1, 15, 0);
+
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_SF) {
+        SRLI_D(s3, s1, 15);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SUB8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
@@ -454,6 +525,51 @@ void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
     emit_sub8(dyn, ninst, s1, s2, s3, s4, s5);
 }
 
+// emit SUB16 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
+void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        ST_H(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_sub16);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    IFXA (X_AF | X_CF | X_OF, !la64_lbt) {
+        // for later flag calculation
+        NOR(s5, xZR, s1);
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        X64_SUB_H(s1, s2);
+    }
+
+    SUB_W(s1, s1, s2);
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) return;
+
+    CLEAR_FLAGS(s3);
+    SLLI_D(s1, s1, 48);
+    IFX (X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    SRLI_D(s1, s1, 48);
+
+    CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 16);
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SUB32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c
index 64c806f9..12616391 100644
--- a/src/dynarec/la64/dynarec_la64_f0.c
+++ b/src/dynarec/la64/dynarec_la64_f0.c
@@ -52,6 +52,28 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     GETREX();
 
     switch (opcode) {
+        case 0x01:
+            INST_NAME("LOCK ADD Ed, Gd");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGD;
+            SMDMB();
+            if ((nextop & 0xC0) == 0xC0) {
+                ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                emit_add32(dyn, ninst, rex, ed, gd, x3, x4, x5);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
+                MARKLOCK;
+                LLxw(x1, wback, 0);
+                ADDxw(x4, x1, gd);
+                SCxw(x4, wback, 0);
+                BEQZ_MARKLOCK(x4);
+                IFX (X_ALL | X_PEND) {
+                    emit_add32(dyn, ninst, rex, x1, gd, x3, x4, x5);
+                }
+            }
+            SMDMB();
+            break;
         case 0x0F:
             nextop = F8;
             switch (nextop) {
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 2ec42225..d6b9629a 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -693,6 +693,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_add32c         STEPNAME(emit_add32c)
 #define emit_add8           STEPNAME(emit_add8)
 #define emit_add8c          STEPNAME(emit_add8c)
+#define emit_add16          STEPNAME(emit_add16)
+#define emit_sub16          STEPNAME(emit_sub16)
 #define emit_sub32          STEPNAME(emit_sub32)
 #define emit_sub32c         STEPNAME(emit_sub32c)
 #define emit_sub8           STEPNAME(emit_sub8)
@@ -705,6 +707,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_or32c          STEPNAME(emit_or32c)
 #define emit_or8            STEPNAME(emit_or8)
 #define emit_or8c           STEPNAME(emit_or8c)
+#define emit_xor8           STEPNAME(emit_xor8)
 #define emit_xor8c          STEPNAME(emit_xor8c)
 #define emit_xor32          STEPNAME(emit_xor32)
 #define emit_xor32c         STEPNAME(emit_xor32c)
@@ -768,6 +771,8 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
 void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4);
+void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
 void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
@@ -780,6 +785,7 @@ void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
 void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_or8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4);
+void emit_xor8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 void emit_xor32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
diff --git a/src/dynarec/la64/la64_printer.c b/src/dynarec/la64/la64_printer.c
index eccf62ff..04dd5ccd 100644
--- a/src/dynarec/la64/la64_printer.c
+++ b/src/dynarec/la64/la64_printer.c
@@ -564,19 +564,19 @@ const char* la64_print(uint32_t opcode, uintptr_t addr)
         return buff;
     }
     if (isMask(opcode, "0010101110iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FLD.D", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FLD.D", Ft[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     if (isMask(opcode, "0010101100iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FLD.S", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FLD.S", Ft[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     if (isMask(opcode, "0010101111iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FST.D", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FST.D", Ft[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     if (isMask(opcode, "0010101101iiiiiiiiiiiijjjjjddddd", &a)) {
-        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FST.S", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        snprintf(buff, sizeof(buff), "%-15s %s, %s, %d", "FST.S", Ft[Rd], Xt[Rj], signExtend(imm, 12));
         return buff;
     }
     if (isMask(opcode, "00000001000000001kkkkkjjjjjddddd", &a)) {