about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-04-05 23:56:24 +0800
committerGitHub <noreply@github.com>2024-04-05 17:56:24 +0200
commit6c5d35df6b79c823640874229fcfab93ea3eb45e (patch)
treec6270e3c4250162e9915cfe217c8236ff03bc921 /src
parent4d260217054f16e4efc31c9bef7974898f43252e (diff)
downloadbox64-6c5d35df6b79c823640874229fcfab93ea3eb45e.tar.gz
box64-6c5d35df6b79c823640874229fcfab93ea3eb45e.zip
[LA64_DYNAREC] Added more opcodes (#1417)
* Added 64 8B MOV opcode

* Added 64 33 XOR opcode

* Added 19 SBB opcode

* Smol fix
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c16
-rw-r--r--src/dynarec/la64/dynarec_la64_64.c90
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c74
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c26
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h27
-rw-r--r--src/dynarec/la64/la64_emitter.h34
6 files changed, 265 insertions, 2 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 56765fe1..d0cf7328 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -141,6 +141,16 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+        case 0x19:
+            INST_NAME("SBB Ed, Gd");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            emit_sbb32(dyn, ninst, rex, ed, gd, x3, x4, x5);
+            WBACK;
+            break;
         case 0x20:
             INST_NAME("AND Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -336,6 +346,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 }
             }
             break;
+        case 0x64:
+            addr = dynarec64_64(dyn, addr, ip, ninst, rex, rep, _FS, ok, need_epilog);
+            break;
+        case 0x65:
+            addr = dynarec64_64(dyn, addr, ip, ninst, rex, rep, _GS, ok, need_epilog);
+            break;
         case 0x66:
             addr = dynarec64_66(dyn, addr, ip, ninst, rex, rep, ok, need_epilog);
             break;
diff --git a/src/dynarec/la64/dynarec_la64_64.c b/src/dynarec/la64/dynarec_la64_64.c
new file mode 100644
index 00000000..9dd03c01
--- /dev/null
+++ b/src/dynarec/la64/dynarec_la64_64.c
@@ -0,0 +1,90 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "custommem.h"
+
+#include "la64_printer.h"
+#include "dynarec_la64_private.h"
+#include "dynarec_la64_helper.h"
+#include "dynarec_la64_functions.h"
+
+uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog)
+{
+    (void)ip;
+    (void)rep;
+    (void)need_epilog;
+
+    uint8_t opcode = F8;
+    uint8_t nextop;
+    uint8_t u8;
+    uint8_t gd, ed, eb1, eb2, gb1, gb2;
+    uint8_t gback, wback, wb1, wb2, wb;
+    int64_t i64, j64;
+    uint64_t u64;
+    int v0, v1;
+    int q0;
+    int d0;
+    int64_t fixedaddress, gdoffset;
+    int unscaled;
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(wb1);
+    MAYUSE(wb2);
+    MAYUSE(gb1);
+    MAYUSE(gb2);
+    MAYUSE(j64);
+    MAYUSE(d0);
+    MAYUSE(q0);
+    MAYUSE(v0);
+    MAYUSE(v1);
+
+    while ((opcode == 0xF2) || (opcode == 0xF3)) {
+        rep = opcode - 0xF1;
+        opcode = F8;
+    }
+
+    GETREX();
+
+    switch (opcode) {
+        case 0x33:
+            INST_NAME("XOR Gd, Seg:Ed");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            grab_segdata(dyn, addr, ninst, x4, seg);
+            nextop = F8;
+            GETGD;
+            GETEDO(x4, 0);
+            emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
+            break;
+        case 0x8B:
+            INST_NAME("MOV Gd, Seg:Ed");
+            grab_segdata(dyn, addr, ninst, x4, seg);
+            nextop = F8;
+            GETGD;
+            if (MODREG) { // reg <= reg
+                MVxw(gd, TO_LA64((nextop & 7) + (rex.b << 3)));
+            } else { // mem <= reg
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                ADD_D(x4, ed, x4);
+                LDxw(gd, x4, fixedaddress);
+            }
+            break;
+        default:
+            DEFAULT;
+    }
+    return addr;
+}
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index f47eb082..ce928edd 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -155,7 +155,10 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
 
     if (la64_lbt) {
         IFX(X_ALL) {
-            X64_ADD_WU(s1, s2);
+            if (rex.w)
+                X64_ADD_DU(s1, s2);
+            else
+                X64_ADD_WU(s1, s2);
         }
         ADDxw(s1, s1, s2);
         if (!rex.w) ZEROUP(s1);
@@ -536,7 +539,11 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     if (la64_lbt) {
         IFX(X_PEND) {} else {MOV64xw(s2, c);}
         IFX(X_ALL) {
-            X64_SUB_WU(s1, s2);
+            if (rex.w) {
+                X64_SUB_WU(s1, s2);
+            } else {
+                X64_SUB_DU(s1, s2);
+            }
         }
         SUBxw(s1, s1, s2);
         if (!rex.w) ZEROUP(s1);
@@ -585,6 +592,69 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
 }
 
 
+// emit SBB32 instruction, from s1, s2, store result in s1 using s3, s4 and s5 as scratch
+void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
+{
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SDxw(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, rex.w ? d_sbb64 : d_sbb32);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        if (rex.w) {
+            SBC_W(s3, s1, s2);
+        } else {
+            SBC_D(s3, s1, s2);
+        }
+        IFX (X_ALL) {
+            if (rex.w)
+                X64_SBC_W(s1, s2);
+            else
+                X64_SBC_D(s1, s2);
+        }
+        MVxw(s1, s3);
+
+        IFX (X_PEND)
+            SDxw(s1, xEmu, offsetof(x64emu_t, res));
+        return;
+    }
+
+    IFX (X_AF | X_CF | X_OF) {
+        // for later flag calculation
+        NOR(s5, xZR, s1);
+    }
+
+    SUBxw(s1, s1, s2);
+    ANDI(s3, xFlags, 1 << F_CF);
+    SUBxw(s1, s1, s3);
+
+    CLEAR_FLAGS(s3);
+    IFX (X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w ? 64 : 32);
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+
 // emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch
 void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3)
 {
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index 7d8b9961..6c5b76b5 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -506,6 +506,32 @@ void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav
     dyn->last_ip = 0;
 }
 
+void grab_segdata(dynarec_la64_t* dyn, uintptr_t addr, int ninst, int reg, int segment)
+{
+    (void)addr;
+    int64_t j64;
+    MAYUSE(j64);
+    MESSAGE(LOG_DUMP, "Get %s Offset\n", (segment == _FS) ? "FS" : "GS");
+    int t1 = x1, t2 = x4;
+    if (reg == t1) ++t1;
+    if (reg == t2) ++t2;
+    LD_WU(t2, xEmu, offsetof(x64emu_t, segs_serial[segment]));
+    LD_D(reg, xEmu, offsetof(x64emu_t, segs_offs[segment]));
+    if (segment == _GS) {
+        CBNZ_MARKSEG(t2); // fast check
+    } else {
+        LD_D(t1, xEmu, offsetof(x64emu_t, context));
+        LD_WU(t1, t1, offsetof(box64context_t, sel_serial));
+        SUB_W(t1, t1, t2);
+        CBZ_MARKSEG(t1);
+    }
+    MOV64x(x1, segment);
+    call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 0, xFlags);
+    MARKSEG;
+    MESSAGE(LOG_DUMP, "----%s Offset\n", (segment == _FS) ? "FS" : "GS");
+}
+
+
 void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st)
 {
     // TODO
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index d3acc05f..3d91d68e 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -122,6 +122,18 @@
 // GETEW will use i for ed, and can use r3 for wback.
 #define GETEW(i, D) GETEWW(x3, i, D)
 
+// GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
+#define GETEDO(O, D)                                                                            \
+    if (MODREG) {                                                                               \
+        ed = xRAX + (nextop & 7) + (rex.b << 3);                                                \
+        wback = 0;                                                                              \
+    } else {                                                                                    \
+        SMREAD();                                                                               \
+        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, D); \
+        LDXxw(x1, wback, O);                                                                    \
+        ed = x1;                                                                                \
+    }
+
 // FAKEED like GETED, but doesn't get anything
 #define FAKEED                                   \
     if (!MODREG) {                               \
@@ -304,6 +316,15 @@
     j64 = (dyn->insts) ? (dyn->insts[ninst].epilog - (dyn->native_size)) : 0; \
     B(j64)
 
+// Branch to MARKSEG if reg is 0 (use j64)
+#define CBZ_MARKSEG(reg)                   \
+    j64 = GETMARKSEG - (dyn->native_size); \
+    BEQZ(reg, j64);
+// Branch to MARKSEG if reg is not 0 (use j64)
+#define CBNZ_MARKSEG(reg)                  \
+    j64 = GETMARKSEG - (dyn->native_size); \
+    BNEZ(reg, j64);
+
 #define IFX(A)      if ((dyn->insts[ninst].x64.gen_flags & (A)))
 #define IFXA(A, B)  if ((dyn->insts[ninst].x64.gen_flags & (A)) && (B))
 #define IFX_PENDOR0 if ((dyn->insts[ninst].x64.gen_flags & (X_PEND) || !dyn->insts[ninst].x64.gen_flags))
@@ -525,6 +546,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 
 #define dynarec64_00   STEPNAME(dynarec64_00)
 #define dynarec64_0F   STEPNAME(dynarec64_0F)
+#define dynarec64_64   STEPNAME(dynarec64_64)
 #define dynarec64_66   STEPNAME(dynarec64_66)
 #define dynarec64_F30F STEPNAME(dynarec64_F30F)
 #define dynarec64_660F STEPNAME(dynarec64_660F)
@@ -538,6 +560,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define jump_to_next        STEPNAME(jump_to_next)
 #define ret_to_epilog       STEPNAME(ret_to_epilog)
 #define call_c              STEPNAME(call_c)
+#define grab_segdata        STEPNAME(grab_segdata)
 #define emit_cmp16          STEPNAME(emit_cmp16)
 #define emit_cmp16_0        STEPNAME(emit_cmp16_0)
 #define emit_cmp32          STEPNAME(emit_cmp32)
@@ -555,6 +578,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_sub32c         STEPNAME(emit_sub32c)
 #define emit_sub8           STEPNAME(emit_sub8)
 #define emit_sub8c          STEPNAME(emit_sub8c)
+#define emit_sbb32          STEPNAME(emit_sbb32)
 #define emit_neg32          STEPNAME(emit_neg32)
 #define emit_or32           STEPNAME(emit_or32)
 #define emit_or32c          STEPNAME(emit_or32c)
@@ -599,6 +623,7 @@ void jump_to_epilog_fast(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst);
 void jump_to_next(dynarec_la64_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits);
 void ret_to_epilog(dynarec_la64_t* dyn, int ninst, rex_t rex);
 void call_c(dynarec_la64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
+void grab_segdata(dynarec_la64_t* dyn, uintptr_t addr, int ninst, int reg, int segment);
 void emit_cmp8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6);
 void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6);
 void emit_cmp32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6);
@@ -616,6 +641,7 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
 void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5);
+void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
 void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
@@ -666,6 +692,7 @@ void CacheTransform(dynarec_la64_t* dyn, int ninst, int cacheupd, int s1, int s2
 uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
 uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
+uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog);
 uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
 uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
 uintptr_t dynarec64_F0(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 291b28da..beca7c1b 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -594,6 +594,18 @@ f24-f31  fs0-fs7   Static registers                Callee
 // MemoryStore(GR[rd][63:0], paddr, DOUBLEWORD)
 #define ST_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100111, imm12, rj, rd))
 
+#define LDX_B(rd, rj, rk)  EMIT(type_3R(0b00111000000000000, rk, rj, rd))
+#define LDX_H(rd, rj, rk)  EMIT(type_3R(0b00111000000001000, rk, rj, rd))
+#define LDX_W(rd, rj, rk)  EMIT(type_3R(0b00111000000010000, rk, rj, rd))
+#define LDX_D(rd, rj, rk)  EMIT(type_3R(0b00111000000011000, rk, rj, rd))
+#define STX_B(rd, rj, rk)  EMIT(type_3R(0b00111000000100000, rk, rj, rd))
+#define STX_H(rd, rj, rk)  EMIT(type_3R(0b00111000000101000, rk, rj, rd))
+#define STX_W(rd, rj, rk)  EMIT(type_3R(0b00111000000110000, rk, rj, rd))
+#define STX_D(rd, rj, rk)  EMIT(type_3R(0b00111000000111000, rk, rj, rd))
+#define LDX_BU(rd, rj, rk) EMIT(type_3R(0b00111000001000000, rk, rj, rd))
+#define LDX_HU(rd, rj, rk) EMIT(type_3R(0b00111000001001000, rk, rj, rd))
+#define LDX_WU(rd, rj, rk) EMIT(type_3R(0b00111000001010000, rk, rj, rd))
+
 #define FLD_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101110, imm12, rj, fd))
 #define FLD_S(fd, rj, imm12) EMIT(type_2RI12(0b0010101100, imm12, rj, fd))
 #define FST_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101111, imm12, rj, fd))
@@ -1631,6 +1643,20 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define X64_ROTLI_D(rj, imm6)   EMIT(type_2RI6(0x55, imm6, rj, 0x17))
 #define X64_RCLI_D(rj, imm6)    EMIT(type_2RI6(0x55, imm6, rj, 0x1b))
 
+// Warning, these are LBT addons that uses LBT4.eflags internally
+#define ADC_B(rd, rj, rk) EMIT(type_3R(0x60, rk, rj, rd))
+#define ADC_H(rd, rj, rk) EMIT(type_3R(0x61, rk, rj, rd))
+#define ADC_W(rd, rj, rk) EMIT(type_3R(0x62, rk, rj, rd))
+#define ADC_D(rd, rj, rk) EMIT(type_3R(0x63, rk, rj, rd))
+#define SBC_B(rd, rj, rk) EMIT(type_3R(0x64, rk, rj, rd))
+#define SBC_H(rd, rj, rk) EMIT(type_3R(0x65, rk, rj, rd))
+#define SBC_W(rd, rj, rk) EMIT(type_3R(0x66, rk, rj, rd))
+#define SBC_D(rd, rj, rk) EMIT(type_3R(0x67, rk, rj, rd))
+#define RCR_B(rd, rj, rk) EMIT(type_3R(0x68, rk, rj, rd))
+#define RCR_H(rd, rj, rk) EMIT(type_3R(0x69, rk, rj, rd))
+#define RCR_W(rd, rj, rk) EMIT(type_3R(0x6a, rk, rj, rd))
+#define RCR_D(rd, rj, rk) EMIT(type_3R(0x6b, rk, rj, rd))
+
 ////////////////////////////////////////////////////////////////////////////////
 
 
@@ -1738,6 +1764,14 @@ LSX instruction starts with V, LASX instruction starts with XV.
             LD_WU(rd, rj, imm12); \
     } while (0)
 
+#define LDXxw(rd, rj, rk)       \
+    do {                        \
+        if (rex.w)              \
+            LDX_D(rd, rj, rk);  \
+        else                    \
+            LDX_WU(rd, rj, rk); \
+    } while (0)
+
 #define LDz(rd, rj, imm12)        \
     do {                          \
         if (rex.is32bits)         \