[LA64_DYNAREC] Added more opcode and fixes (#1423)

* Fixed F2 0F MOVSD opcode * Added F3 0F 6F MOVDQU opcode * Added F2,F3 A5 MOVSD opcode * Added 80 /5 SUB opcode * Fixed a typo in .clang-format * Added 66 85 TEST opcode and a fix
author: Yang Liu <numbksco@gmail.com> 2024-04-06 17:38:04 +0800
committer: GitHub <noreply@github.com> 2024-04-06 11:38:04 +0200
commit: 89f0c494e4e1d4eac32d839869e45f1ca1d384d8 (patch)
tree: 9dc4428812ba1b45758dad08911cce89b254727a /src
parent: 24f9263fa4a9b7cf5fa6f07677bf7fa388e0ad54 (diff)
download: box64-89f0c494e4e1d4eac32d839869e45f1ca1d384d8.tar.gz
box64-89f0c494e4e1d4eac32d839869e45f1ca1d384d8.zip
6 files changed, 117 insertions, 7 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 676e8316..ea3dd6fa 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -463,6 +463,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_and8c(dyn, ninst, x1, u8, x2, x4);
                     EBBACK();
                     break;
+                case 5: // SUB
+                    INST_NAME("SUB Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
+                    EBBACK();
+                    break;
                 case 7: // CMP
                     INST_NAME("CMP Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -690,6 +698,37 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             LD_BU(x2, x1, 0);
             BSTRINS_D(xRAX, x2, 7, 0);
             break;
+        case 0xA5:
+            if (rep) {
+                INST_NAME("REP MOVSD");
+                CBZ_NEXT(xRCX);
+                ANDI(x1, xFlags, 1 << F_DF);
+                BNEZ_MARK2(x1);
+                MARK; // Part with DF==0
+                LDxw(x1, xRSI, 0);
+                SDxw(x1, xRDI, 0);
+                ADDI_D(xRSI, xRSI, rex.w ? 8 : 4);
+                ADDI_D(xRDI, xRDI, rex.w ? 8 : 4);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK(xRCX);
+                B_NEXT_nocond;
+                MARK2; // Part with DF==1
+                LDxw(x1, xRSI, 0);
+                SDxw(x1, xRDI, 0);
+                ADDI_D(xRSI, xRSI, rex.w ? -8 : -4);
+                ADDI_D(xRDI, xRDI, rex.w ? -8 : -4);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK2(xRCX);
+                // done
+            } else {
+                INST_NAME("MOVSD");
+                GETDIR(x3, x1, rex.w ? 8 : 4);
+                LDxw(x1, xRSI, 0);
+                SDxw(x1, xRDI, 0);
+                ADD_D(xRSI, xRSI, x3);
+                ADD_D(xRDI, xRDI, x3);
+            }
+            break;
         case 0xA6:
             switch (rep) {
                 case 1:
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index 25023eb4..b4d35233 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -91,6 +91,14 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     DEFAULT;
             }
             break;
+        case 0x85:
+            INST_NAME("TEST Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEW(x1, 0);
+            GETGW(x2);
+            emit_test16(dyn, ninst, x1, x2, x3, x4, x5);
+            break;
         case 0x89:
             INST_NAME("MOV Ew, Gw");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_tests.c b/src/dynarec/la64/dynarec_la64_emit_tests.c
index 1fbf9a7e..2e65ca0c 100644
--- a/src/dynarec/la64/dynarec_la64_emit_tests.c
+++ b/src/dynarec/la64/dynarec_la64_emit_tests.c
@@ -132,7 +132,7 @@ void emit_cmp16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     CLEAR_FLAGS(s3);
     IFX(X_AF | X_CF | X_OF) {
         // for later flag calculation
-        NOR(s5, s5, s1);
+        NOR(s5, xZR, s1);
     }
 
     // It's a cmp, we can't store the result back to s1.
@@ -329,6 +329,47 @@ void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
 }
 
+// emit TEST16 instruction, from test s1, s2, using s3, s4 and s5 as scratch
+void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    IFX_PENDOR0 {
+        SET_DF(s3, d_tst16);
+    } else {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            X64_AND_H(s1, s2);
+        }
+
+        IFX_PENDOR0 {
+            AND(s3, s1, s2);
+            ST_H(s3, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    CLEAR_FLAGS(s3);
+    AND(s3, s1, s2); // res = s1 & s2
+
+    IFX_PENDOR0 {
+        ST_H(s3, xEmu, offsetof(x64emu_t, res));
+    }
+
+    IFX (X_SF) {
+        SRLI_D(s4, s3, 15);
+        BEQZ(s4, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_ZF) {
+        BNEZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s3, s4, s5);
+    }
+}
+
 // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch
 void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c
index d3d22596..25e523cb 100644
--- a/src/dynarec/la64/dynarec_la64_f20f.c
+++ b/src/dynarec/la64/dynarec_la64_f20f.c
@@ -68,9 +68,9 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETG;
             v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
             if(MODREG) {
-                ed = (nextop&7)+ (rex.b<<3);
+                ed = (nextop & 7) + (rex.b << 3);
                 d0 = sse_get_reg(dyn, ninst, x1, ed, 0);
-                FMOV_D(d0, v0);
+                VEXTRINS_D(d0, v0, 0); // d0[63:0] = v0[63:0]
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 1, 0);
                 FST_D(v0, ed, fixedaddress);
@@ -85,7 +85,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEXSD(v1, 0);
             d0 = fpu_get_scratch(dyn);
             FADD_D(d0, v0, v1);
-            VEXTRINS_D(v0, d0, 0); // v0[63:0] = v1[63:0]
+            VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
         case 0x59:
             INST_NAME("MULSD Gx, Ex");
@@ -95,7 +95,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEXSD(v1, 0);
             d0 = fpu_get_scratch(dyn);
             FMUL_D(d0, v0, v1);
-            VEXTRINS_D(v0, d0, 0); // v0[63:0] = v1[63:0]
+            VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
         case 0x5C:
             INST_NAME("SUBSD Gx, Ex");
@@ -105,7 +105,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEXSD(v1, 0);
             d0 = fpu_get_scratch(dyn);
             FSUB_D(d0, v0, v1);
-            VEXTRINS_D(v0, d0, 0); // v0[63:0] = v1[63:0]
+            VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
         case 0x5E:
             INST_NAME("DIVSD Gx, Ex");
@@ -115,7 +115,7 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEXSD(v1, 0);
             d0 = fpu_get_scratch(dyn);
             FDIV_D(d0, v0, v1);
-            VEXTRINS_D(v0, d0, 0); // v0[63:0] = v1[63:0]
+            VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
         default:
             DEFAULT;
diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c
index 9c1da571..f406dead 100644
--- a/src/dynarec/la64/dynarec_la64_f30f.c
+++ b/src/dynarec/la64/dynarec_la64_f30f.c
@@ -53,6 +53,20 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             FAKEED;
             break;
+        case 0x6F:
+            INST_NAME("MOVDQU Gx, Ex");
+            nextop = F8;
+            if (MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0);
+                GETGX_empty(v0);
+                VOR_V(v0, v1, v1);
+            } else {
+                GETGX_empty(v0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 0, 0);
+                VLD(v0, ed, fixedaddress);
+            }
+            break;
         case 0x7E:
             INST_NAME("MOVQ Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 99c39ae5..664384a3 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -83,6 +83,12 @@
 // GETGD    get x64 register in gd
 #define GETGD gd = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3));
 
+// GETGW extract x64 register in gd, that is i
+#define GETGW(i)                                         \
+    gd = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); \
+    BSTRPICK_D(i, gd, 15, 0);                            \
+    gd = i;
+
 // GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
 #define GETED(D)                                                                                \
     if (MODREG) {                                                                               \
@@ -606,6 +612,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_cmp8           STEPNAME(emit_cmp8)
 #define emit_cmp8_0         STEPNAME(emit_cmp8_0)
 #define emit_test8          STEPNAME(emit_test8)
+#define emit_test16         STEPNAME(emit_test16)
 #define emit_test32         STEPNAME(emit_test32)
 #define emit_test32c        STEPNAME(emit_test32c)
 #define emit_add32          STEPNAME(emit_add32)
@@ -672,6 +679,7 @@ void emit_cmp8_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_cmp16_0(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_cmp32_0(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
 void emit_test8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_test16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_test32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_test32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);
 void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
author	Yang Liu <numbksco@gmail.com>	2024-04-06 17:38:04 +0800
committer	GitHub <noreply@github.com>	2024-04-06 11:38:04 +0200
commit	89f0c494e4e1d4eac32d839869e45f1ca1d384d8 (patch)
tree	9dc4428812ba1b45758dad08911cce89b254727a /src
parent	24f9263fa4a9b7cf5fa6f07677bf7fa388e0ad54 (diff)
download	box64-89f0c494e4e1d4eac32d839869e45f1ca1d384d8.tar.gz box64-89f0c494e4e1d4eac32d839869e45f1ca1d384d8.zip