about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-12-26 04:00:20 +0800
committerGitHub <noreply@github.com>2024-12-25 21:00:20 +0100
commit145689783f91eae5248ade24edde61362a0a6ba0 (patch)
treed7c3737fa336c19063ba806c5e6796ebc6436485
parent21a21b04e6b678cbdd88172c0d162e82f2190c9f (diff)
downloadbox64-145689783f91eae5248ade24edde61362a0a6ba0.tar.gz
box64-145689783f91eae5248ade24edde61362a0a6ba0.zip
[LA64_DYNAREC] Added more opcodes and fixed CVTTPD2DQ (#2211)
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c20
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c32
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c52
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_shift.c70
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h2
-rw-r--r--src/dynarec/la64/la64_emitter.h4
6 files changed, 175 insertions, 5 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index a5a8f459..18d8183d 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -1920,6 +1920,23 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0xD2: // TODO: Jump if CL is 0
             nextop = F8;
             switch ((nextop >> 3) & 7) {
+                case 4:
+                case 6:
+                    if (opcode == 0xD0) {
+                        INST_NAME("SHL Eb, 1");
+                        GETEB(x1, 0);
+                        MOV32w(x2, 1);
+                    } else {
+                        INST_NAME("SHL Eb, CL");
+                        GETEB(x1, 0);
+                        ANDI(x2, xRCX, 0x1F);
+                        BEQ_NEXT(x2, xZR);
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
+                    if (box64_dynarec_safeflags > 1) MAYSETFLAGS();
+                    emit_shl8(dyn, ninst, x1, x2, x5, x4, x6);
+                    EBBACK();
+                    break;
                 case 5:
                     if (opcode == 0xD0) {
                         INST_NAME("SHR Eb, 1");
@@ -1932,8 +1949,7 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         BEQ_NEXT(x2, xZR);
                     }
                     SETFLAGS(X_ALL, SF_SET_PENDING, NAT_FLAGS_FUSION); // some flags are left undefined
-                    if (box64_dynarec_safeflags > 1)
-                        MAYSETFLAGS();
+                    if (box64_dynarec_safeflags > 1) MAYSETFLAGS();
                     emit_shr8(dyn, ninst, x1, x2, x5, x4, x6);
                     EBBACK();
                     break;
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index f0da22d8..4e80972f 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -968,6 +968,38 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ZEROUP(gd);
             }
             break;
+        case 0xB3:
+            INST_NAME("BTR Ed, Gd");
+            SETFLAGS(X_CF, SF_SUBSET, NAT_FLAGS_NOFUSION);
+            SET_DFNONE();
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                ed = TO_NAT((nextop & 7) + (rex.b << 3));
+                wback = 0;
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                SRAIxw(x1, gd, 5 + rex.w);
+                ADDSL(x3, wback, x1, 2 + rex.w, x1);
+                LDxw(x1, x3, fixedaddress);
+                ed = x1;
+                wback = x3;
+            }
+            ANDI(x2, gd, rex.w ? 0x3f : 0x1f);
+            SRL_D(x4, ed, x2);
+            BSTRINS_D(xFlags, x4, 0, 0);
+            ADDI_D(x4, xZR, 1);
+            ANDI(x2, gd, rex.w ? 0x3f : 0x1f);
+            SLL_D(x4, x4, x2);
+            ANDN(ed, ed, x4);
+            if (wback) {
+                SDxw(ed, wback, fixedaddress);
+                SMWRITE();
+            } else if (!rex.w) {
+                ZEROUP(ed);
+            }
+            break;
         case 0xB6:
             INST_NAME("MOVZX Gd, Eb");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 6d760bb9..d2024ba3 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -1634,6 +1634,7 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGX_empty(v0);
             // TODO: fastround
             VFTINTRZ_W_D(v0, v1, v1);
+            VINSGR2VR_D(v0, xZR, 1);
             break;
         case 0xE7:
             INST_NAME("MOVNTDQ Ex, Gx");
@@ -1710,6 +1711,57 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 VXOR_V(q0, q0, q1);
             }
             break;
+        case 0xF1:
+            INST_NAME("PSLLW Gx, Ex");
+            nextop = F8;
+            GETGX(q0, 1);
+            GETEX(q1, 0, 0);
+            v0 = fpu_get_scratch(dyn);
+            v1 = fpu_get_scratch(dyn);
+            VREPLVE_H(v1, q1, xZR);
+            VPICKVE2GR_DU(x4, q1, 0);
+            SLTUI(x3, x4, 16);
+            SUB_D(x3, xZR, x3);
+            NOR(x3, x3, xZR);
+            VREPLGR2VR_D(v0, x3);
+            VSLL_H(q0, q0, v1);
+            VAND_V(v0, q0, v0);
+            VXOR_V(q0, q0, v0);
+            break;
+        case 0xF2:
+            INST_NAME("PSLLD Gx, Ex");
+            nextop = F8;
+            GETGX(q0, 1);
+            GETEX(q1, 0, 0);
+            v0 = fpu_get_scratch(dyn);
+            v1 = fpu_get_scratch(dyn);
+            VREPLVE_W(v1, q1, xZR);
+            VPICKVE2GR_DU(x4, q1, 0);
+            SLTUI(x3, x4, 32);
+            SUB_D(x3, xZR, x3);
+            NOR(x3, x3, xZR);
+            VREPLGR2VR_D(v0, x3);
+            VSLL_W(q0, q0, v1);
+            VAND_V(v0, q0, v0);
+            VXOR_V(q0, q0, v0);
+            break;
+        case 0xF3:
+            INST_NAME("PSLLQ Gx, Ex");
+            nextop = F8;
+            GETGX(q0, 1);
+            GETEX(q1, 0, 0);
+            v0 = fpu_get_scratch(dyn);
+            v1 = fpu_get_scratch(dyn);
+            VREPLVE_D(v1, q1, xZR);
+            VPICKVE2GR_DU(x4, q1, 0);
+            SLTUI(x3, x4, 64);
+            SUB_D(x3, xZR, x3);
+            NOR(x3, x3, xZR);
+            VREPLGR2VR_D(v0, x3);
+            VSLL_D(q0, q0, v1);
+            VAND_V(v0, q0, v0);
+            VXOR_V(q0, q0, v0);
+            break;
         case 0xF4:
             INST_NAME("PMULUDQ Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c
index d1fd7290..9c7845dd 100644
--- a/src/dynarec/la64/dynarec_la64_emit_shift.c
+++ b/src/dynarec/la64/dynarec_la64_emit_shift.c
@@ -336,13 +336,77 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     }
 }
 
-// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
-void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+// emit SHL8 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
+void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
-    int64_t j64;
+    if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);
+
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, op1));
+        ST_B(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_shl8);
+    } else IFXORNAT (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            X64_SLL_B(s1, s2);
+        }
+        SLL_D(s1, s1, s2);
+        ANDI(s1, s1, 0xff);
+
+        IFX (X_PEND) {
+            ST_B(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
 
+    SLL_D(s1, s1, s2);
 
+    // s2 is not 0 here and is 1..1f/3f
+    CLEAR_FLAGS(s3);
+    IFX (X_CF | X_OF) {
+        SRLI_D(s5, s1, 8);
+        ANDI(s5, s5, 1); // LSB == F_CF
+        IFX (X_CF) {
+            OR(xFlags, xFlags, s5);
+        }
+    }
+
+    SLLI_D(s1, s1, 56);
+    IFX (X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    SRLI_D(s1, s1, 56);
+
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        ADDI_D(s3, s2, -1);
+        BNEZ(s3, 4 + 4 * 4);
+        SRLI_D(s3, s1, 7);
+        XOR(s3, s3, s5);
+        SLLI_D(s3, s3, F_OF);
+        OR(xFlags, xFlags, s3);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SHR8 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
+void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
     if (dyn->insts[ninst].nat_flags_fusion) NAT_FLAGS_OPS(s1, xZR);
+
     IFX (X_PEND) {
         ST_B(s2, xEmu, offsetof(x64emu_t, op2));
         ST_B(s1, xEmu, offsetof(x64emu_t, op1));
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 7eaaac30..38ae926a 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -897,6 +897,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_shl16c         STEPNAME(emit_shl16c)
 #define emit_shl32          STEPNAME(emit_shl32)
 #define emit_shl32c         STEPNAME(emit_shl32c)
+#define emit_shl8           STEPNAME(emit_shl8)
 #define emit_shr8           STEPNAME(emit_shr8)
 #define emit_shr16          STEPNAME(emit_shr16)
 #define emit_shr16c         STEPNAME(emit_shr16c)
@@ -1006,6 +1007,7 @@ void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 void emit_shl16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_shl8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_shr16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_shr16c(dynarec_la64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 1cccad6a..cddcb1c1 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -1898,6 +1898,10 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VPICKVE2GR_DU(rd, vj, imm1)  EMIT(type_2RI1(0b011100101111001111110, imm1, vj, rd))
 #define VFRINT_D(vd, vj)             EMIT(type_2R(0b0111001010011101001110, vj, vd))
 #define VFRINTRRD_D(vd, vj, imm4)    EMIT(type_2RI4(0b011100101001110101, imm4, vj, vd))
+#define VREPLGR2VR_B(vd, rj)         EMIT(type_2R(0b0111001010011111000000, rj, vd))
+#define VREPLGR2VR_H(vd, rj)         EMIT(type_2R(0b0111001010011111000001, rj, vd))
+#define VREPLGR2VR_W(vd, rj)         EMIT(type_2R(0b0111001010011111000010, rj, vd))
+#define VREPLGR2VR_D(vd, rj)         EMIT(type_2R(0b0111001010011111000011, rj, vd))
 
 ////////////////////////////////////////////////////////////////////////////////
 // (undocumented) LBT extension instructions