about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-04-02 19:44:40 +0800
committerGitHub <noreply@github.com>2024-04-02 13:44:40 +0200
commit10cec3de2f04c48b60c0e0f91614244b4515e6ef (patch)
treec5bd2c034ca376bd04db7be7ac51abc57ea58122 /src
parent76c0992d02be79bf15ce9975cfbbc824b593d50d (diff)
downloadbox64-10cec3de2f04c48b60c0e0f91614244b4515e6ef.tar.gz
box64-10cec3de2f04c48b60c0e0f91614244b4515e6ef.zip
[LA64_DYNAREC] Added more SSE/SSE2 instructions (#1400)
* [LA64_DYNAREC] Added more SSE/SSE2 instructions

* Thank you test16
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c47
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c56
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c18
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h23
-rw-r--r--src/dynarec/la64/la64_emitter.h14
5 files changed, 154 insertions, 4 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 5566183d..cbd5f0e1 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -103,12 +103,57 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             LOAD_XEMU_REM();
             jump_to_epilog(dyn, 0, xRIP, ninst);
             break;
+        case 0x11:
+            INST_NAME("MOVUPS Ex,Gx");
+            nextop = F8;
+            GETG;
+            v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                v1 = sse_get_reg_empty(dyn, ninst, x1, ed);
+                VOR_V(v1, v0, v0);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                VST(v0, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
+        case 0x16:
+            nextop = F8;
+            if (MODREG) {
+                INST_NAME("MOVLHPS Gx,Ex");
+                GETGX(v0, 1);
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0);
+            } else {
+                INST_NAME("MOVHPS Gx,Ex");
+                SMREAD();
+                GETGX(v0, 1);
+                v1 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                FLD_D(v1, ed, fixedaddress);
+            }
+            VILVL_D(v0, v1, v0); // v0[127:64] = v1[63:0]
+            break;
         case 0x1F:
             INST_NAME("NOP (multibyte)");
             nextop = F8;
             FAKEED;
             break;
-
+        case 0x29:
+            INST_NAME("MOVAPS Ex,Gx");
+            nextop = F8;
+            GETG;
+            v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                v1 = sse_get_reg_empty(dyn, ninst, x1, ed);
+                VOR_V(v1, v0, v0);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                VST(v0, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
         #define GO(GETFLAGS, NO, YES, F, I)                                                          \
             READFLAGS(F);                                                                            \
             if (la64_lbt) {                                                                          \
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index effaf5bf..8a8a06fa 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -55,21 +55,71 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             FAKEED;
             break;
+        case 0x6C:
+            INST_NAME("PUNPCKLQDQ Gx,Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            if (MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0);
+            } else {
+                v1 = fpu_get_scratch(dyn);
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                FLD_D(v1, ed, fixedaddress);
+            }
+            VILVL_D(v0, v1, v0); // v0[127:64] = v1[63:0]
+            break;
         case 0x6E:
             INST_NAME("MOVD Gx, Ed");
             nextop = F8;
             GETGX_empty(v0);
             v1 = fpu_get_scratch(dyn);
-            GETED(0);
+            if (MODREG) {
+                ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                if (rex.w) {
+                    MOVGR2FR_D(v1, ed);
+                } else {
+                    MOVGR2FR_W(v1, ed);
+                }
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                FLDxw(v1, ed, fixedaddress);
+            }
             VXOR_V(v0, v0, v0);
             if (rex.w) {
-                MOVGR2FR_D(v1, ed);
                 VEXTRINS_D(v0, v1, 0); // v0[63:0] = v1[63:0]
             } else {
-                MOVGR2FR_W(v1, ed);
                 VEXTRINS_W(v0, v1, 0); // v0[31:0] = v1[31:0]
             }
             break;
+        case 0x6F:
+            INST_NAME("MOVDQA Gx,Ex");
+            nextop = F8;
+            if (MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0);
+                GETGX_empty(v0);
+                VOR_V(v0, v1, v1);
+            } else {
+                GETGX_empty(v0);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                VLD(v0, ed, fixedaddress);
+            }
+            break;
+        case 0xEF:
+            INST_NAME("PXOR Gx,Ex");
+            nextop = F8;
+            GETG;
+            if (MODREG && ((nextop & 7) + (rex.b << 3) == gd)) {
+                // special case for PXOR Gx, Gx
+                q0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                VXOR_V(q0, q0, q0);
+            } else {
+                q0 = sse_get_reg(dyn, ninst, x1, gd, 1);
+                GETEX(q1, 0, 0);
+                VXOR_V(q0, q0, q1);
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index 9ad715e6..7d8b9961 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -511,6 +511,24 @@ void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st)
     // TODO
 }
 
+// SSE / SSE2 helpers
+// get lsx register for a SSE reg, create the entry if needed
+int sse_get_reg(dynarec_la64_t* dyn, int ninst, int s1, int a, int forwrite)
+{
+    if (dyn->lsx.ssecache[a].v != -1) {
+        if (forwrite) {
+            dyn->lsx.ssecache[a].write = 1; // update only if forwrite
+            dyn->lsx.lsxcache[dyn->lsx.ssecache[a].reg].t = LSX_CACHE_XMMW;
+        }
+        return dyn->lsx.ssecache[a].reg;
+    }
+    dyn->lsx.ssecache[a].reg = fpu_get_reg_xmm(dyn, forwrite ? LSX_CACHE_XMMW : LSX_CACHE_XMMR, a);
+    int ret = dyn->lsx.ssecache[a].reg;
+    dyn->lsx.ssecache[a].write = forwrite;
+    VLD(ret, xEmu, offsetof(x64emu_t, xmm[a]));
+    return ret;
+}
+
 // get lsx register for an SSE reg, but don't try to synch it if it needed to be created
 int sse_get_reg_empty(dynarec_la64_t* dyn, int ninst, int s1, int a)
 {
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 74470b68..4d8a8d8c 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -174,13 +174,33 @@
     gd = i;                                                   \
     BSTRPICK_D(gd, gb1, gb2 + 7, gb2);
 
+// Get GX as a quad (might use x1)
+#define GETGX(a, w)                             \
+    gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \
+    a = sse_get_reg(dyn, ninst, x1, gd, w)
+
+
 #define GETGX_empty(a)                          \
     gd = ((nextop & 0x38) >> 3) + (rex.r << 3); \
     a = sse_get_reg_empty(dyn, ninst, x1, gd)
 
+// Get EX as a quad, (x1 is used)
+#define GETEX(a, w, D)                                                                       \
+    if (MODREG) {                                                                            \
+        a = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), w);                     \
+    } else {                                                                                 \
+        SMREAD();                                                                            \
+        addr = geted(dyn, addr, ninst, nextop, &ed, x3, x2, &fixedaddress, rex, NULL, 1, D); \
+        a = fpu_get_scratch(dyn);                                                            \
+        VLD(a, ed, fixedaddress);                                                            \
+    }
+
 // Write gb (gd) back to original register / memory, using s1 as scratch
 #define GBBACK() BSTRINS_D(gb1, gd, gb2 + 7, gb2);
 
+// Generic get GD, but reg value in gd (R_RAX is not added)
+#define GETG gd = ((nextop & 0x38) >> 3) + (rex.r << 3)
+
 // Write eb (ed) back to original register / memory, using s1 as scratch
 #define EBBACK()                            \
     if (wb1) {                              \
@@ -527,6 +547,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 
 #define x87_forget       STEPNAME(x87_forget)
 #define sse_purge07cache STEPNAME(sse_purge07cache)
+#define sse_get_reg       STEPNAME(sse_get_reg)
 #define sse_get_reg_empty STEPNAME(sse_get_reg_empty)
 
 #define fpu_pushcache       STEPNAME(fpu_pushcache)
@@ -600,6 +621,8 @@ void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st);
 // SSE/SSE2 helpers
 // purge the XMM0..XMM7 cache (before function call)
 void sse_purge07cache(dynarec_la64_t* dyn, int ninst, int s1);
+// get lsx register for a SSE reg, create the entry if needed
+int sse_get_reg(dynarec_la64_t* dyn, int ninst, int s1, int a, int forwrite);
 // get lsx register for an SSE reg, but don't try to synch it if it needed to be created
 int sse_get_reg_empty(dynarec_la64_t* dyn, int ninst, int s1, int a);
 
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 117bb99e..9e584062 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -524,6 +524,11 @@ f24-f31  fs0-fs7   Static registers                Callee
 // MemoryStore(GR[rd][63:0], paddr, DOUBLEWORD)
 #define ST_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100111, imm12, rj, rd))
 
+#define FLD_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101110, imm12, rj, fd))
+#define FLD_S(fd, rj, imm12) EMIT(type_2RI12(0b0010101100, imm12, rj, fd))
+#define FST_D(fd, rj, imm12) EMIT(type_2RI12(0b0010101111, imm12, rj, fd))
+#define FST_S(fd, rj, imm12) EMIT(type_2RI12(0b0010101101, imm12, rj, fd))
+
 #define FADD_S(fd, fj, fk)       EMIT(type_3R(0b00000001000000001, fk, fj, fd))
 #define FADD_D(fd, fj, fk)       EMIT(type_3R(0b00000001000000010, fk, fj, fd))
 #define FSUB_S(fd, fj, fk)       EMIT(type_3R(0b00000001000000101, fk, fj, fd))
@@ -1671,6 +1676,15 @@ LSX instruction starts with V, LASX instruction starts with XV.
             LD_D(rd, rj, imm12);  \
     } while (0)
 
+#define FLDxw(rd, rj, imm12)      \
+    do {                          \
+        if (rex.w)                \
+            FLD_D(rd, rj, imm12); \
+        else                      \
+            FLD_S(rd, rj, imm12); \
+    } while (0)
+
+
 #define SDxw(rd, rj, imm12)      \
     do {                         \
         if (rex.w)               \