about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-03-05 23:29:52 +0800
committerGitHub <noreply@github.com>2024-03-05 16:29:52 +0100
commitc3f97dd73053e2c5341785f80c345ca4f972d927 (patch)
tree7896bdf7e0b4a126b4524ebe2aff27a8c8a13ddb /src
parentb1ebf9dc79e55c5415e3a1320b04f410a8fdc15c (diff)
downloadbox64-c3f97dd73053e2c5341785f80c345ca4f972d927.tar.gz
box64-c3f97dd73053e2c5341785f80c345ca4f972d927.zip
[LA64_DYNAREC] Added more opcodes and some fixes too (#1331)
* [LA64_DYNAREC] Added 66 90..97 NOP/XCHG opcodes

* Added AB REP STOSD opcode

* Fixed a bug in emit_sar32c

* [LA64_DYNAREC] Added 0F B7 MOVZX opcode

* Why I keep forgetting the TO_LA64 thing?!

* Added 20..24 AND opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c68
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c14
-rw-r--r--src/dynarec/la64/dynarec_la64_66.c34
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_logic.c80
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_shift.c1
-rw-r--r--src/dynarec/la64/dynarec_la64_f0.c2
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h31
7 files changed, 226 insertions, 4 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 53875d29..2417ad18 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -129,6 +129,49 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+        case 0x20:
+            INST_NAME("AND Eb, Gb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_and8(dyn, ninst, x1, x2, x4, x5);
+            EBBACK(x4, 0);
+            break;
+        case 0x21:
+            INST_NAME("AND Ed, Gd");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            emit_and32(dyn, ninst, rex, ed, gd, x3, x4);
+            WBACK;
+            break;
+        case 0x22:
+            INST_NAME("AND Gb, Eb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_and8(dyn, ninst, x2, x1, x4, x5);
+            GBBACK(x5);
+            break;
+        case 0x23:
+            INST_NAME("AND Gd, Ed");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            emit_and32(dyn, ninst, rex, gd, ed, x3, x4);
+            break;
+        case 0x24:
+            INST_NAME("AND AL, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            u8 = F8;
+            ANDI(x1, xRAX, 0xff);
+            emit_and8c(dyn, ninst, x1, u8, x3, x4);
+            BSTRINS_D(xRAX, x1, 7, 0);
+            break;
         case 0x25:
             INST_NAME("AND EAX, Id");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -518,6 +561,31 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MOV64xw(x2, i64);
             emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4, x5);
             break;
+        case 0xAB:
+            if (rep) {
+                INST_NAME("REP STOSD");
+                CBZ_NEXT(xRCX);
+                ANDI(x1, xFlags, 1 << F_DF);
+                BNEZ_MARK2(x1);
+                MARK; // Part with DF==0
+                SDxw(xRAX, xRDI, 0);
+                ADDI_D(xRDI, xRDI, rex.w ? 8 : 4);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK(xRCX);
+                B_NEXT_nocond;
+                MARK2; // Part with DF==1
+                SDxw(xRAX, xRDI, 0);
+                ADDI_D(xRDI, xRDI, rex.w ? -8 : -4);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK2(xRCX);
+                // done
+            } else {
+                INST_NAME("STOSD");
+                GETDIR(x3, x1, rex.w ? 8 : 4);
+                SDxw(xRAX, xRDI, 0);
+                ADD_D(xRDI, xRDI, x3);
+            }
+            break;
         case 0xB8:
         case 0xB9:
         case 0xBA:
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index d3e7821e..6e90d230 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -186,7 +186,19 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             LD_D(xRDX, xEmu, offsetof(x64emu_t, regs[_DX]));
             LD_D(xRBX, xEmu, offsetof(x64emu_t, regs[_BX]));
             break;
-
+        case 0xB7:
+            INST_NAME("MOVZX Gd, Ew");
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                BSTRPICK_D(gd, ed, 15, 0);
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                LD_HU(gd, ed, fixedaddress);
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index 7626102c..7f3cd96c 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -65,6 +65,40 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+        case 0x89:
+            INST_NAME("MOV Ew, Gw");
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                if (ed != gd) {
+                    BSTRINS_W(ed, gd, 15, 0);
+                    ZEROUP(ed);
+                }
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
+                ST_H(gd, ed, fixedaddress);
+                SMWRITELOCK(lock);
+            }
+            break;
+        case 0x90:
+        case 0x91:
+        case 0x92:
+        case 0x93:
+        case 0x94:
+        case 0x95:
+        case 0x96:
+        case 0x97:
+            gd = TO_LA64((opcode & 0x07) + (rex.b << 3));
+            if (gd == xRAX) {
+                INST_NAME("NOP");
+            } else {
+                INST_NAME("XCHG AX, Reg");
+                MV(x2, xRAX);
+                BSTRPICK_D(xRAX, gd, 15, 0);
+                BSTRPICK_D(gd, x2, 15, 0);
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c
index 423b885e..2aa4315d 100644
--- a/src/dynarec/la64/dynarec_la64_emit_logic.c
+++ b/src/dynarec/la64/dynarec_la64_emit_logic.c
@@ -72,6 +72,43 @@ void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 }
 
+// emit AND8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
+void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+{
+    IFX(X_PEND) {
+        SET_DF(s3, d_and8);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+    IFXA(X_ALL, la64_lbt) {
+        X64_AND_B(s1, s2);
+    }
+
+    AND(s1, s1, s2);
+
+    IFX(X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) return;
+
+    CLEAR_FLAGS(s3);
+    IFX(X_SF) {
+        SRLI_D(s3, s1, 7);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+
 // emit AND8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
 {
@@ -110,6 +147,49 @@ void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s
     }
 }
 
+
+// emit AND32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
+void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
+{
+    IFX(X_PEND) {
+        SET_DF(s3, rex.w ? d_tst64 : d_tst32);
+    } else IFX(X_ALL) {
+        SET_DFNONE();
+    }
+
+
+    IFXA(X_ALL, la64_lbt) {
+        if (rex.w)
+            X64_AND_D(s1, s2);
+        else
+            X64_AND_W(s1, s2);
+    }
+
+    AND(s1, s1, s2); // res = s1 & s2
+    if (!rex.w) ZEROUP(s1);
+
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) return;
+
+    CLEAR_FLAGS(s3);
+    IFX(X_SF) {
+        SRLI_D(s3, s1, rex.w ? 63 : 31);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX(X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF)
+    {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit AND32 instruction, from s1, c, store result in s1 using s3 and s4 as scratch
 void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4)
 {
diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c
index fe461cdb..b2534c60 100644
--- a/src/dynarec/la64/dynarec_la64_emit_shift.c
+++ b/src/dynarec/la64/dynarec_la64_emit_shift.c
@@ -200,6 +200,7 @@ void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         }
 
         SRAIxw(s1, s1, c);
+        if (!rex.w) ZEROUP(s1);
 
         IFX(X_PEND) {
             SDxw(s1, xEmu, offsetof(x64emu_t, res));
diff --git a/src/dynarec/la64/dynarec_la64_f0.c b/src/dynarec/la64/dynarec_la64_f0.c
index 346badc1..adf6ce8f 100644
--- a/src/dynarec/la64/dynarec_la64_f0.c
+++ b/src/dynarec/la64/dynarec_la64_f0.c
@@ -64,7 +64,7 @@ uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             GETGD;
                             SMDMB();
                             if (MODREG) {
-                                ed = xRAX + (nextop & 7) + (rex.b << 3);
+                                ed = TO_LA64((nextop & 7) + (rex.b << 3));
                                 MVxw(x1, ed);
                                 MVxw(ed, gd);
                                 MVxw(gd, x1);
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index d1e00434..fd07111b 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -170,6 +170,15 @@
         BSTRINS_D(wback, ed, wb2 + 7, wb2); \
     }
 
+// Get direction with size Z and based of F_DF flag, on register r ready for load/store fetching
+// using s as scratch.
+// F_DF is not in LBT4.eflags, don't worry
+#define GETDIR(r, s, Z)            \
+    MOV32w(r, Z); /* mask=1<<10 */ \
+    ANDI(s, xFlags, 1 << F_DF);    \
+    BEQZ(s, 4 + 4);                \
+    SUB_D(r, xZR, r);
+
 // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
 #define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0)
@@ -211,13 +220,24 @@
 
 // Branch to MARK if reg1!=reg2 (use j64)
 #define BNE_MARK(reg1, reg2) Bxx_gen(NE, MARK, reg1, reg2)
+// Branch to MARK2 if reg1!=reg2 (use j64)
+#define BNE_MARK2(reg1, reg2) Bxx_gen(NE, MARK2, reg1, reg2)
+// Branch to MARK3 if reg1!=reg2 (use j64)
+#define BNE_MARK3(reg1, reg2) Bxx_gen(NE, MARK3, reg1, reg2)
+// Branch to MARKLOCK if reg1!=reg2 (use j64)
+#define BNE_MARKLOCK(reg1, reg2) Bxx_gen(NE, MARKLOCK, reg1, reg2)
 
 // Branch to MARKLOCK if reg1==reg2 (use j64)
 #define BEQ_MARKLOCK(reg1, reg2) Bxx_gen(EQ, MARKLOCK, reg1, reg2)
 // Branch to MARKLOCK if reg1==0 (use j64)
 #define BEQZ_MARKLOCK(reg) BxxZ_gen(EQ, MARKLOCK, reg)
-// Branch to MARKLOCK if reg1!=reg2 (use j64)
-#define BNE_MARKLOCK(reg1, reg2) Bxx_gen(NE, MARKLOCK, reg1, reg2)
+
+// Branch to MARK if reg1!=0 (use j64)
+#define BNEZ_MARK(reg) BxxZ_gen(NE, MARK, reg)
+// Branch to MARK2 if reg1!=0 (use j64)
+#define BNEZ_MARK2(reg) BxxZ_gen(NE, MARK2, reg)
+// Branch to MARK3 if reg1!=0 (use j64)
+#define BNEZ_MARK3(reg) BxxZ_gen(NE, MARK3, reg)
 // Branch to MARKLOCK if reg1!=0 (use j64)
 #define BNEZ_MARKLOCK(reg) BxxZ_gen(NE, MARKLOCK, reg)
 
@@ -229,6 +249,9 @@
 #define CBNZ_NEXT(reg1)                                                       \
     j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
     BNEZ(reg1, j64)
+#define B_NEXT_nocond                                                         \
+    j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
+    B(j64)
 
 #define IFX(A)      if ((dyn->insts[ninst].x64.gen_flags & (A)))
 #define IFXA(A, B)  if ((dyn->insts[ninst].x64.gen_flags & (A)) && (B))
@@ -467,7 +490,9 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_or32           STEPNAME(emit_or32)
 #define emit_or32c          STEPNAME(emit_or32c)
 #define emit_xor32          STEPNAME(emit_xor32)
+#define emit_and8           STEPNAME(emit_and8)
 #define emit_and8c          STEPNAME(emit_and8c)
+#define emit_and32          STEPNAME(emit_and32)
 #define emit_and32c         STEPNAME(emit_and32c)
 #define emit_shl32          STEPNAME(emit_shl32)
 #define emit_shr32c         STEPNAME(emit_shr32c)
@@ -519,7 +544,9 @@ void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s
 void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);