about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-04-07 14:08:14 +0800
committerGitHub <noreply@github.com>2023-04-07 08:08:14 +0200
commit1159c0530bba39bd293c146b313bfd5968c9efa0 (patch)
treef2d33266e5ab6102d522433f929b21383c4bdd18 /src
parent053ecec70bda076cfd4910a850bfbd8971fd7501 (diff)
downloadbox64-1159c0530bba39bd293c146b313bfd5968c9efa0.tar.gz
box64-1159c0530bba39bd293c146b313bfd5968c9efa0.zip
[RV64_DYNAREC] Added more opcodes for Stardew Valley (#671)
* [RV64_DYNAREC] Fixed PADD opcode

* [RV64_DYNAREC] Added 66 0F 73 /7 PSLLDQ opcode

* [RV64_DYNAREC] Added F2 0F 58 ADDSD opcode

* [RV64_DYNAREC] Added F2 0F 59 MULSD opcode

* [RV64_DYNAREC] Added F3 0F 6F,7F MOVDQU opcode

* [RV64_DYNAREC] Fixed emit_shl32* flagg calculation

* [RV64_DYNAREC] Fixed 8D LEA opcode

* [RV64_DYNAREC] Fixed 66 0F FD PADDW opcode

* [RV64_DYNAREC] Fixed PSHUFD opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c101
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c18
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f20f.c17
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c15
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h12
6 files changed, 113 insertions, 52 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index 03843610..0dae504d 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -870,7 +870,7 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 if(gd!=ed) {    // it's sometimes used as a 3 bytes NOP
                     MV(gd, ed);
                 }
-                else if(!rex.w) {
+                if(!rex.w) {
                     ZEROUP(gd);   //truncate the higher 32bits as asked
                 }
             }
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index 05b13987..a5be39bf 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -198,10 +198,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETGX(x1);
             GETEX(x2, 0);
-            LD(x3, wback, fixedaddress+0);
-            LD(x4, wback, fixedaddress+8);
-            SD(x3, gback, 0);
-            SD(x4, gback, 8);
+            SSE_LOOP_MV_Q(x3);
             break;
         case 0x70: // TODO: Optimize this!
             INST_NAME("PSHUFD Gx,Ex,Ib");
@@ -210,14 +207,21 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(x2, 1);
             u8 = F8;
             i32 = -1;
-            for (int i=0; i<4; ++i) {
-                int32_t idx = (u8>>(i*2))&3;
-                if (idx!=i32) {
-                    LWU(x4, wback, fixedaddress+idx*4);
-                    i32 = idx;
-                }
-                SW(x4, gback, i*4);
-            }
+            int32_t idx;
+
+            idx = (u8>>(0*2))&3;
+            LWU(x3, wback, fixedaddress+idx*4);
+            idx = (u8>>(1*2))&3;
+            LWU(x4, wback, fixedaddress+idx*4);
+            idx = (u8>>(2*2))&3;
+            LWU(x5, wback, fixedaddress+idx*4);
+            idx = (u8>>(3*2))&3;
+            LWU(x6, wback, fixedaddress+idx*4);
+
+            SW(x3, gback, 0*4);
+            SW(x4, gback, 1*4);
+            SW(x5, gback, 2*4);
+            SW(x6, gback, 3*4);
             break;
         case 0x72:
             nextop = F8;
@@ -247,31 +251,57 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("PSRLDQ Ex, Ib");
                     GETEX(x1, 1);
                     u8 = F8;
-                    if(u8) {
-                        if(u8>15) {
-                            // just zero dest
-                            SD(xZR, x1, fixedaddress+0);
-                            SD(xZR, x1, fixedaddress+8);
+                    if(!u8) break;
+                    if(u8>15) {
+                        // just zero dest
+                        SD(xZR, x1, fixedaddress+0);
+                        SD(xZR, x1, fixedaddress+8);
+                    } else {
+                        u8*=8;
+                        if (u8 < 64) {
+                            LD(x3, x1, fixedaddress+0);
+                            LD(x4, x1, fixedaddress+8);
+                            SRLI(x3, x3, u8);
+                            SLLI(x5, x4, 64-u8);
+                            OR(x3, x3, x5);
+                            SD(x3, x1, fixedaddress+0);
+                            SRLI(x4, x4, u8);
+                            SD(x4, x1, fixedaddress+8);
                         } else {
-                            u8*=8;
-                            if (u8 < 64) {
-                                LD(x3, x1, fixedaddress+0);
-                                LD(x4, x1, fixedaddress+8);
-                                SRLI(x3, x3, u8);
-                                SLLI(x5, x4, 64-u8);
-                                OR(x3, x3, x5);
-                                SD(x3, x1, fixedaddress+0);
-                                SRLI(x4, x4, u8);
-                                SD(x4, x1, fixedaddress+8);
-                            } else {
-                                LD(x3, x1, fixedaddress+8);
-                                if (u8-64 > 0) { SRLI(x3, x3, u8-64); }
-                                SD(x3, x1, fixedaddress+0);
-                                SD(xZR, x1, fixedaddress+8);
-                            }
+                            LD(x3, x1, fixedaddress+8);
+                            if (u8-64 > 0) { SRLI(x3, x3, u8-64); }
+                            SD(x3, x1, fixedaddress+0);
+                            SD(xZR, x1, fixedaddress+8);
                         }
                     }
                     break;
+                case 7:
+                    INST_NAME("PSLLDQ Ex, Ib");
+                    GETEX(x1, 1);
+                    u8 = F8;
+                    if(!u8) break;
+                    if(u8>15) {
+                        // just zero dest
+                        SD(xZR, x1, fixedaddress+0);
+                        SD(xZR, x1, fixedaddress+8);
+                    } else {
+                        u8*=8;
+                        if (u8 < 64) {
+                            LD(x3, x1, fixedaddress+0);
+                            LD(x4, x1, fixedaddress+8);
+                            SLLI(x4, x4, u8);
+                            SRLI(x5, x3, 64-u8);
+                            OR(x4, x4, x5);
+                            SD(x4, x1, fixedaddress+8);
+                            SLLI(x3, x3, u8);
+                            SD(x3, x1, fixedaddress+0);
+                        } else {
+                            LD(x3, x1, fixedaddress+0);
+                            if (u8-64 > 0) { SLLI(x3, x3, u8-64); }
+                            SD(x3, x1, fixedaddress+8);
+                            SD(xZR, x1, fixedaddress+0);
+                        }
+                    }
                 default:
                     DEFAULT;
             }
@@ -395,17 +425,16 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0xFD:
             INST_NAME("PADDW Gx,Ex");
             nextop = F8;
-            nextop = F8;
             GETGX(x1);
             GETEX(x2, 0);
-            SSE_LOOP_WQ(x3, x4, ADDW(x3, x3, x4));
+            SSE_LOOP_W(x3, x4, ADDW(x3, x3, x4));
             break;
         case 0xFE:
             INST_NAME("PADDD Gx,Ex");
             nextop = F8;
             GETGX(x1);
             GETEX(x2, 0);
-            SSE_LOOP_DQ(x3, x4, ADDW(x3, x3, x4));
+            SSE_LOOP_D(x3, x4, ADDW(x3, x3, x4));
             break;
         default:
             DEFAULT;
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index 1ecb57c6..f0245994 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -36,13 +36,14 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SET_DFNONE();
     }
 
-    IFX(X_CF) {
+    IFX(X_CF|X_OF) {
         SUBI(s5, s2, rex.w?64:32);
         NEG(s5, s5);
         SRL(s3, s1, s5);
-        ANDI(s5, s3, 1); // LSB
-        BEQZ(s5, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        ANDI(s5, s3, 1); // F_CF
+        IFX(X_CF) {
+            OR(xFlags, xFlags, s5);
+        }
     }
 
     SLL(s1, s1, s2);
@@ -92,12 +93,13 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         }
         return;
     }
-    IFX(X_CF) {
+    IFX(X_CF|X_OF) {
         if (c > 0) {
             SRLI(s3, s1, (rex.w?64:32)-c);
-            ANDI(s5, s3, 1); // LSB
-            BEQZ(s5, 8);
-            ORI(xFlags, xFlags, 1 << F_CF);
+            ANDI(s5, s3, 1); // F_CF
+            IFX(X_CF) {
+                OR(xFlags, xFlags, s5);
+            }
         } else {
             IFX(X_OF) MOV64x(s5, 0);
         }
diff --git a/src/dynarec/rv64/dynarec_rv64_f20f.c b/src/dynarec/rv64/dynarec_rv64_f20f.c
index ef181e97..683917d5 100644
--- a/src/dynarec/rv64/dynarec_rv64_f20f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f20f.c
@@ -125,7 +125,22 @@ uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     DEFAULT;
             }
             break;
-
+        case 0x58:
+            INST_NAME("ADDSD Gx, Ex");
+            nextop = F8;
+            // TODO: fastnan handling
+            GETGXSD(v0);
+            GETEXSD(v1, 0);
+            FADDD(v0, v0, v1);
+            break;
+        case 0x59:
+            INST_NAME("MULSD Gx, Ex");
+            nextop = F8;
+            //TODO: fastnan handling
+            GETGXSD(v0);
+            GETEXSD(v1, 0);
+            FMULD(v0, v0, v1);
+            break;
         case 0x5C:
             INST_NAME("SUBSD Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index 9c11eec8..f90861bf 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -186,6 +186,13 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             FMVS(d0, d1);
             MARK2;
             break;
+        case 0x6F:
+            INST_NAME("MOVDQU Gx,Ex");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 0);
+            SSE_LOOP_MV_Q(x3);
+            break;
         case 0x7E:
             INST_NAME("MOVQ Gx, Ex");
             nextop = F8;
@@ -202,6 +209,14 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             SD(xZR, xEmu, offsetof(x64emu_t, xmm[gd])+8);
             break;
+        case 0x7F:
+            INST_NAME("MOVDQU Ex,Gx");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 0);
+            SSE_LOOP_MV_Q2(x3);
+            if(!MODREG) SMWRITE2();
+            break;
         case 0xC2:
             INST_NAME("CMPSS Gx, Ex, Ib");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 565fe018..62aaf79d 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -348,15 +348,15 @@
     SSE_LOOP_D_ITEM(GX1, EX1, F, 2) \
     SSE_LOOP_D_ITEM(GX1, EX1, F, 3)
 
-#define SSE_LOOP_DQ(GX1, EX1, F)    \
-    SSE_LOOP_D_ITEM(GX1, EX1, F, 0) \
-    SSE_LOOP_D_ITEM(GX1, EX1, F, 1)
-
-#define SSE_LOOP_WQ(GX1, EX1, F)    \
+#define SSE_LOOP_W(GX1, EX1, F)    \
     SSE_LOOP_W_ITEM(GX1, EX1, F, 0) \
     SSE_LOOP_W_ITEM(GX1, EX1, F, 1) \
     SSE_LOOP_W_ITEM(GX1, EX1, F, 2) \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 3)
+    SSE_LOOP_W_ITEM(GX1, EX1, F, 3) \
+    SSE_LOOP_W_ITEM(GX1, EX1, F, 4) \
+    SSE_LOOP_W_ITEM(GX1, EX1, F, 5) \
+    SSE_LOOP_W_ITEM(GX1, EX1, F, 6) \
+    SSE_LOOP_W_ITEM(GX1, EX1, F, 7)
 
 
 #define SSE_LOOP_DS_ITEM(EX1, F, i)     \