about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-05-03 18:41:49 +0800
committerGitHub <noreply@github.com>2024-05-03 12:41:49 +0200
commit31dabf51aebe9825ab0c3110ebabd3e3125e927e (patch)
tree523cdb915044e410bbc376e0571f3f99c3ad7bc9 /src
parent22fd100b04409b3b9aebbdd6707e73802b22be50 (diff)
downloadbox64-31dabf51aebe9825ab0c3110ebabd3e3125e927e.tar.gz
box64-31dabf51aebe9825ab0c3110ebabd3e3125e927e.zip
[LA64_DYNAREC] Added more opcodes (#1490)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c72
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c18
-rw-r--r--src/dynarec/la64/dynarec_la64_66.c101
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c40
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_shift.c63
-rw-r--r--src/dynarec/la64/dynarec_la64_f20f.c8
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h10
7 files changed, 296 insertions, 16 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 02bbbee2..6e5ec591 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -300,10 +300,19 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGD;
             GETED(0);
             emit_xor32(dyn, ninst, rex, ed, gd, x3, x4);
-            if(ed!=gd) {
+            if (ed != gd) {
                 WBACK;
             }
             break;
+        case 0x32:
+            INST_NAME("XOR Gb, Eb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_xor8(dyn, ninst, x2, x1, x4, x5);
+            GBBACK();
+            break;
         case 0x33:
             INST_NAME("XOR Gd, Ed");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -312,6 +321,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETED(0);
             emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
             break;
+        case 0x35:
+            INST_NAME("XOR EAX, Id");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            i64 = F32S;
+            emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4);
+            break;
         case 0x38:
             INST_NAME("CMP Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -320,12 +335,6 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGB(x2);
             emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5, x6);
             break;
-        case 0x35:
-            INST_NAME("XOR EAX, Id");
-            SETFLAGS(X_ALL, SF_SET_PENDING);
-            i64 = F32S;
-            emit_xor32c(dyn, ninst, rex, xRAX, i64, x3, x4);
-            break;
         case 0x39:
             INST_NAME("CMP Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -1048,6 +1057,31 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MOV64xw(x2, i64);
             emit_test32(dyn, ninst, rex, xRAX, x2, x3, x4, x5);
             break;
+        case 0xAA:
+            if (rep) {
+                INST_NAME("REP STOSB");
+                CBZ_NEXT(xRCX);
+                ANDI(x1, xFlags, 1 << F_DF);
+                BNEZ_MARK2(x1);
+                MARK; // Part with DF==0
+                ST_B(xRAX, xRDI, 0);
+                ADDI_D(xRDI, xRDI, 1);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK(xRCX);
+                B_NEXT_nocond;
+                MARK2; // Part with DF==1
+                ST_B(xRAX, xRDI, 0);
+                ADDI_D(xRDI, xRDI, -1);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK2(xRCX);
+                // done
+            } else {
+                INST_NAME("STOSB");
+                GETDIR(x3, x1, 1);
+                ST_B(xRAX, xRDI, 0);
+                ADD_D(xRDI, xRDI, x3);
+            }
+            break;
         case 0xAB:
             if (rep) {
                 INST_NAME("REP STOSD");
@@ -1450,6 +1484,30 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 }
             }
             break;
+        case 0xD0:
+        case 0xD2: // TODO: Jump if CL is 0
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 5:
+                    if (opcode == 0xD0) {
+                        INST_NAME("SHR Eb, 1");
+                        MOV32w(x2, 1);
+                    } else {
+                        INST_NAME("SHR Eb, CL");
+                        ANDI(x2, xRCX, 0x1F);
+                        BEQ_NEXT(x2, xZR);
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                    if (box64_dynarec_safeflags > 1)
+                        MAYSETFLAGS();
+                    GETEB(x1, 0);
+                    emit_shr8(dyn, ninst, x1, x2, x5, x4, x6);
+                    EBBACK();
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
         case 0xD1:
             nextop = F8;
             switch ((nextop >> 3) & 7) {
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 5cc8ee71..6fb729f4 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -158,6 +158,24 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             }
             VILVL_D(v0, v1, v0); // v0[127:64] = v1[63:0]
             break;
+        case 0x18:
+            nextop = F8;
+            if ((nextop & 0xC0) == 0xC0) {
+                INST_NAME("NOP (multibyte)");
+            } else
+                switch ((nextop >> 3) & 7) {
+                    case 0:
+                    case 1:
+                    case 2:
+                    case 3:
+                        INST_NAME("PREFETCHh Ed");
+                        FAKEED;
+                        break;
+                    default:
+                        INST_NAME("NOP (multibyte)");
+                        FAKEED;
+                }
+            break;
         case 0x1F:
             INST_NAME("NOP (multibyte)");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index 6c4e54f9..a9a5dc68 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -67,6 +67,15 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_add16(dyn, ninst, x1, x2, x3, x4, x6);
             GWBACK;
             break;
+        case 0x09:
+            INST_NAME("OR Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x2);
+            GETEW(x1, 0);
+            emit_or16(dyn, ninst, x1, x2, x4, x2);
+            EWBACK;
+            break;
         case 0x0F:
             switch (rep) {
                 case 0: addr = dynarec64_660F(dyn, addr, ip, ninst, rex, ok, need_epilog); break;
@@ -145,6 +154,18 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETEW(x2, 0);
             emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6);
             break;
+        case 0x3D:
+            INST_NAME("CMP AX, Iw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            i32 = F16;
+            BSTRPICK_D(x1, xRAX, 15, 0);
+            if (i32) {
+                MOV32w(x2, i32);
+                emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5, x6);
+            } else {
+                emit_cmp16_0(dyn, ninst, x1, x3, x4);
+            }
+            break;
         case 0x81:
         case 0x83:
             nextop = F8;
@@ -181,6 +202,22 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_or16(dyn, ninst, x1, x5, x2, x4);
                     EWBACK;
                     break;
+                case 4: // AND
+                    if (opcode == 0x81) {
+                        INST_NAME("AND Ew, Iw");
+                    } else {
+                        INST_NAME("AND Ew, Ib");
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEW(x1, (opcode == 0x81) ? 2 : 1);
+                    if (opcode == 0x81)
+                        i16 = F16S;
+                    else
+                        i16 = F8S;
+                    MOV64x(x5, i16);
+                    emit_and16(dyn, ninst, x1, x5, x2, x4);
+                    EWBACK;
+                    break;
                 case 5: // SUB
                     if (opcode == 0x81) {
                         INST_NAME("SUB Ew, Iw");
@@ -260,6 +297,31 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 BSTRINS_D(gd, x2, 15, 0);
             }
             break;
+        case 0xAB:
+            if (rep) {
+                INST_NAME("REP STOSW");
+                CBZ_NEXT(xRCX);
+                ANDI(x1, xFlags, 1 << F_DF);
+                BNEZ_MARK2(x1);
+                MARK; // Part with DF==0
+                ST_H(xRAX, xRDI, 0);
+                ADDI_D(xRDI, xRDI, 2);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK(xRCX);
+                B_NEXT_nocond;
+                MARK2; // Part with DF==1
+                ST_H(xRAX, xRDI, 0);
+                ADDI_D(xRDI, xRDI, -2);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK2(xRCX);
+                // done
+            } else {
+                INST_NAME("STOSW");
+                GETDIR(x3, x1, 2);
+                ST_H(xRAX, xRDI, 0);
+                ADD_D(xRDI, xRDI, x3);
+            }
+            break;
         case 0xB8:
         case 0xB9:
         case 0xBA:
@@ -277,6 +339,45 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0xC1:
             nextop = F8;
             switch ((nextop >> 3) & 7) {
+                case 0:
+                    INST_NAME("ROL Ew, Ib");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    SETFLAGS(X_OF | X_CF, SF_SET_DF);
+                    GETEW(x1, 1);
+                    u8 = F8;
+                    MOV32w(x2, u8);
+                    CALL_(rol16, x1, x3);
+                    EWBACK;
+                    break;
+                case 1:
+                    INST_NAME("ROR Ew, Ib");
+                    MESSAGE(LOG_DUMP, "Need Optimization\n");
+                    SETFLAGS(X_OF | X_CF, SF_SET_DF);
+                    GETEW(x1, 1);
+                    u8 = F8;
+                    MOV32w(x2, u8);
+                    CALL_(ror16, x1, x3);
+                    EWBACK;
+                    break;
+                case 4:
+                case 6:
+                    INST_NAME("SHL Ew, Ib");
+                    UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); }
+                    SETFLAGS(X_ALL, SF_PENDING);
+                    GETEW(x1, 1);
+                    u8 = F8;
+                    UFLAG_IF { MOV32w(x2, (u8 & 15)); }
+                    UFLAG_OP12(ed, x2)
+                    if (MODREG) {
+                        SLLI_D(ed, ed, 48 + (u8 & 15));
+                        SRLI_D(ed, ed, 48);
+                    } else {
+                        SLLI_D(ed, ed, u8 & 15);
+                    }
+                    EWBACK;
+                    UFLAG_RES(ed);
+                    UFLAG_DF(x3, d_shl16);
+                    break;
                 case 5:
                     INST_NAME("SHR Ew, Ib");
                     UFLAG_IF { MESSAGE(LOG_DUMP, "Need Optimization for flags\n"); }
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 3bac9e1f..3064397f 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -86,6 +86,46 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 VLD(v0, ed, fixedaddress);
             }
             break;
+        case 0x2E:
+            // no special check...
+        case 0x2F:
+            if (opcode == 0x2F) {
+                INST_NAME("COMISD Gx, Ex");
+            } else {
+                INST_NAME("UCOMISD Gx, Ex");
+            }
+            SETFLAGS(X_ALL, SF_SET);
+            SET_DFNONE();
+            nextop = F8;
+            GETGX(d0, 0);
+            GETEXSD(v0, 0, 0);
+
+            CLEAR_FLAGS(x3);
+            // if isnan(d0) || isnan(v0)
+            IFX (X_ZF | X_PF | X_CF) {
+                FCMP_D(fcc0, d0, v0, cUN);
+                BCEQZ_MARK(fcc0);
+                ORI(xFlags, xFlags, (1 << F_ZF) | (1 << F_PF) | (1 << F_CF));
+                B_MARK3_nocond;
+            }
+            MARK;
+            // else if isless(d0, v0)
+            IFX (X_CF) {
+                FCMP_D(fcc1, d0, v0, cLT);
+                BCEQZ_MARK2(fcc1);
+                ORI(xFlags, xFlags, 1 << F_CF);
+                B_MARK3_nocond;
+            }
+            MARK2;
+            // else if d0 == v0
+            IFX (X_ZF) {
+                FCMP_D(fcc2, d0, v0, cEQ);
+                BCEQZ_MARK3(fcc2);
+                ORI(xFlags, xFlags, 1 << F_ZF);
+            }
+            MARK3;
+            SPILL_EFLAGS();
+            break;
         case 0x38: // SSSE3 opcodes
             nextop = F8;
             switch (nextop) {
diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c
index aa528b34..6bea6301 100644
--- a/src/dynarec/la64/dynarec_la64_emit_shift.c
+++ b/src/dynarec/la64/dynarec_la64_emit_shift.c
@@ -8,6 +8,7 @@
 #include "dynarec.h"
 #include "emu/x64emu_private.h"
 #include "emu/x64run_private.h"
+#include "la64_emitter.h"
 #include "x64run.h"
 #include "x64emu.h"
 #include "box64stack.h"
@@ -169,6 +170,66 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     }
 }
 
+// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
+void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    int64_t j64;
+
+
+    IFX (X_PEND) {
+        ST_B(s2, xEmu, offsetof(x64emu_t, op2));
+        ST_B(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, d_shr8);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            X64_SRL_B(s1, s2);
+        }
+        SRL_D(s1, s1, s2);
+        ANDI(s1, s1, 0xff);
+
+        IFX (X_PEND) {
+            ST_B(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        ADDI_D(s3, s2, -1);
+        SRA_D(s3, s1, s3);
+        ANDI(s3, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s3);
+    }
+    IFX (X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        ADDI_D(s3, xZR, 1);
+        BNE(s2, s3, 4 + 3 * 4);
+        SRLI_D(s3, s1, 7);
+        SLLI_D(s3, s3, F_OF);
+        OR(xFlags, xFlags, s3);
+    }
+
+    SRL_D(s1, s1, s2);
+    ANDI(s1, s1, 0xff);
+
+    // SF should be unset
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 
 // emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
 void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
@@ -210,7 +271,7 @@ void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         // OF flag is affected only on 1-bit shifts
         // OF flag is set to the most-significant bit of the original operand
         ADDI_D(s3, xZR, 1);
-        BEQ(s2, s3, 4 + 4 * 4);
+        BNE(s2, s3, 4 + 4 * 4);
         SRLIxw(s3, s1, rex.w ? 63 : 31);
         SLLI_D(s3, s3, F_OF);
         OR(xFlags, xFlags, s3);
diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c
index 6167883d..903d7e58 100644
--- a/src/dynarec/la64/dynarec_la64_f20f.c
+++ b/src/dynarec/la64/dynarec_la64_f20f.c
@@ -97,7 +97,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("ADDSD Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
-            GETEXSD(v1, 0);
+            GETEXSD(v1, 0, 0);
             d0 = fpu_get_scratch(dyn);
             FADD_D(d0, v0, v1);
             if (!box64_dynarec_fastnan) {
@@ -114,7 +114,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MULSD Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
-            GETEXSD(v1, 0);
+            GETEXSD(v1, 0, 0);
             d0 = fpu_get_scratch(dyn);
             FMUL_D(d0, v0, v1);
             if (!box64_dynarec_fastnan) {
@@ -131,7 +131,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("SUBSD Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
-            GETEXSD(v1, 0);
+            GETEXSD(v1, 0, 0);
             d0 = fpu_get_scratch(dyn);
             FSUB_D(d0, v0, v1);
             if (!box64_dynarec_fastnan) {
@@ -148,7 +148,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("DIVSD Gx, Ex");
             nextop = F8;
             GETGX(v0, 1);
-            GETEXSD(v1, 0);
+            GETEXSD(v1, 0, 0);
             d0 = fpu_get_scratch(dyn);
             FDIV_D(d0, v0, v1);
             if (!box64_dynarec_fastnan) {
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 5fa9d127..d7bf5efd 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -290,14 +290,14 @@
     }
 
 // Get Ex as a double, not a quad (warning, x1 get used, x2 might too)
-#define GETEXSD(a, D)                                                                        \
+#define GETEXSD(a, w, D)                                                                     \
     if (MODREG) {                                                                            \
-        a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0);                     \
+        a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w);                     \
     } else {                                                                                 \
-        SMREAD();                                                                            \
+        SMREAD(); /* TODO */                                                                 \
         a = fpu_get_scratch(dyn);                                                            \
         addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, D); \
-        FLD_D(a, ed, fixedaddress);                                                            \
+        FLD_D(a, ed, fixedaddress);                                                          \
     }
 
 // Get Ex as a single, not a quad (warning, x1 get used)
@@ -762,6 +762,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_and32c         STEPNAME(emit_and32c)
 #define emit_shl32          STEPNAME(emit_shl32)
 #define emit_shl32c         STEPNAME(emit_shl32c)
+#define emit_shr8           STEPNAME(emit_shr8)
 #define emit_shr32          STEPNAME(emit_shr32)
 #define emit_shr32c         STEPNAME(emit_shr32c)
 #define emit_sar32c         STEPNAME(emit_sar32c)
@@ -845,6 +846,7 @@ void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);