about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-04-09 01:22:00 +0800
committerGitHub <noreply@github.com>2023-04-08 19:22:00 +0200
commitd9c30c8942888d609d89df5b8ea29071d2663b46 (patch)
tree2e0275ef7f49151fd2d286e8bc98c16d493d3d11 /src
parent6f3d70c69f2b3ebd013a36bded8efe61e9dbc463 (diff)
downloadbox64-d9c30c8942888d609d89df5b8ea29071d2663b46.tar.gz
box64-d9c30c8942888d609d89df5b8ea29071d2663b46.zip
[RV64_DYNAREC] Added more opcodes for SV and some fixes (#676)
* [RV64_DYNAREC] Added 66 0F 69 PUNPCKHWD opcode

* [RV64_DYNAREC] Added 66 0F D7 PMOVMSKB opcode

* [RV64_DYNAREC] Added 00 ADD opcode

* [RV64_DYNAREC] Fixed emit_xor32

* [RV64_DYNAREC] Added 66 0F 5C SUBPD opcode

* [RV64_DYNAREC] Added 66 0F EE PMAXSW opcode

* [RV64_DYNAREC] Added 66 0F 11 MOVUPD opcode

* [RV64_DYNAREC] Added 66 0F 10 MOVUPD opcode

* [RV64_DYNAREC] Added 66 0F 29 MOVAPD opcode
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c9
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c75
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_logic.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h43
4 files changed, 114 insertions, 15 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index 2454662d..34c3fedb 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -53,6 +53,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(cacheupd);
 
     switch(opcode) {
+        case 0x00:
+            INST_NAME("ADD Eb, Gb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_add8(dyn, ninst, x1, x2, x4, x5);
+            EBBACK(x5, 0);
+            break;
         case 0x01:
             INST_NAME("ADD Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index cbffe1c1..2a19e9ec 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -49,6 +49,21 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
     MAYUSE(j64);
     
     switch(opcode) {
+        case 0x10:
+            INST_NAME("MOVUPD Gx,Ex");
+            nextop = F8;
+            GETEX(x1, 0);
+            GETGX(x2);
+            SSE_LOOP_MV_Q(x3);
+            break;
+        case 0x11:
+            INST_NAME("MOVUPD Ex,Gx");
+            nextop = F8;
+            GETEX(x1, 0);
+            GETGX(x2);
+            SSE_LOOP_MV_Q2(x3);
+            if(!MODREG) SMWRITE2();
+            break;
         case 0x14:
             INST_NAME("UNPCKLPD Gx, Ex");
             nextop = F8;
@@ -92,6 +107,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGX(x2);
             SSE_LOOP_MV_Q(x3);
             break;
+        case 0x29:
+            INST_NAME("MOVAPD Ex,Gx");
+            nextop = F8;
+            GETEX(x1, 0);
+            GETGX(x2);
+            SSE_LOOP_MV_Q2(x3);
+            if(!MODREG) SMWRITE2();
+            break;
         case 0x2E:
             // no special check...
         case 0x2F:
@@ -155,6 +178,14 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGX(x2);
             SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
             break;
+        case 0x5C:
+            INST_NAME("SUBPD Gx, Ex");
+            nextop = F8;
+            //TODO: fastnan handling
+            GETEX(x1, 0);
+            GETGX(x2);
+            SSE_LOOP_FQ(x3, x4, FSUBD(v0, v0, v1));
+            break;
         case 0x61:
             INST_NAME("PUNPCKLWD Gx,Ex");
             nextop = F8;
@@ -194,6 +225,30 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             LWU(x3, x1, fixedaddress+0*4);
             SW(x3, x2, 1*4);
             break;
+        case 0x69:
+            INST_NAME("PUNPCKHWD Gx,Ex");
+            nextop = F8;
+            GETEX(x1, 0);
+            GETGX(x2);
+            for(int i=0; i<4; ++i) {
+                // GX->uw[2 * i] = GX->uw[i + 4];
+                LHU(x3, gback, (i+4)*2);
+                SH(x3, gback, 2*i*2);
+            }
+            if (MODREG && (ed==gd)) {
+                for(int i=0; i<4; ++i) {
+                    // GX->uw[2 * i + 1] = GX->uw[2 * i];
+                    LHU(x3, gback, 2*i*2);
+                    SH(x3, gback, (2*i+1)*2);
+                }
+            } else {
+                for(int i=0; i<4; ++i) {
+                    // GX->uw[2 * i + 1] = EX->uw[i + 4];
+                    LHU(x3, wback, fixedaddress+(i+4)*2);
+                    SH(x3, gback, (2*i+1)*2);
+                }
+            }
+            break;
         case 0x6C:
             INST_NAME("PUNPCKLQDQ Gx,Ex");
             nextop = F8;
@@ -485,6 +540,19 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SMWRITE2();
             }
             break;
+        case 0xD7:
+            INST_NAME("PMOVMSKB Gd, Ex");
+            nextop = F8;
+            GETEX(x2, 0);
+            GETGD;
+            MV(gd, xZR);
+            for (int i=0; i<16; ++i) {
+                LB(x1, wback, fixedaddress+i);
+                SLT(x3, x1, xZR);
+                if (i > 0) SLLI(x3, x3, i);
+                OR(gd, gd, x3);
+            }
+            break;
         case 0xDB:
             INST_NAME("PAND Gx,Ex");
             nextop = F8;
@@ -506,6 +574,13 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(x2, 0);
             SSE_LOOP_Q(x3, x4, OR(x3, x3, x4));
             break;
+        case 0xEE:
+            INST_NAME("PMAXSW Gx,Ex");
+            nextop = F8;
+            GETGX(x1);
+            GETEX(x2, 0);
+            SSE_LOOP_WS(x3, x4, BGE(x3, x4, 8); MV(x3, x4));
+            break;
         case 0xEF:
             INST_NAME("PXOR Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_logic.c b/src/dynarec/rv64/dynarec_rv64_emit_logic.c
index 693a15c1..6d17895f 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_logic.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_logic.c
@@ -93,6 +93,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 
     // test sign bit before zeroup.
     IFX(X_SF) {
+        if (!rex.w) SEXT_W(s1, s1);
         BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
@@ -132,6 +133,7 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
 
     // test sign bit before zeroup.
     IFX(X_SF) {
+        if (!rex.w) SEXT_W(s1, s1);
         BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 62aaf79d..b1947e7f 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -335,12 +335,6 @@
     F;                                  \
     SW(GX1, gback, i*4);
 
-#define SSE_LOOP_W_ITEM(GX1, EX1, F, i) \
-    LHU(GX1, gback, i*2);               \
-    LHU(EX1, wback, fixedaddress+i*2);  \
-    F;                                  \
-    SH(GX1, gback, i*2);
-
 // Loop for SSE opcode that use 32bits value and write to GX.
 #define SSE_LOOP_D(GX1, EX1, F)     \
     SSE_LOOP_D_ITEM(GX1, EX1, F, 0) \
@@ -348,16 +342,21 @@
     SSE_LOOP_D_ITEM(GX1, EX1, F, 2) \
     SSE_LOOP_D_ITEM(GX1, EX1, F, 3)
 
-#define SSE_LOOP_W(GX1, EX1, F)    \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 0) \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 1) \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 2) \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 3) \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 4) \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 5) \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 6) \
-    SSE_LOOP_W_ITEM(GX1, EX1, F, 7)
+#define SSE_LOOP_W(GX1, EX1, F)            \
+    for (int i=0; i<8; ++i) {              \
+        LHU(GX1, gback, i*2);              \
+        LHU(EX1, wback, fixedaddress+i*2); \
+        F;                                 \
+        SH(GX1, gback, i*2);               \
+    }
 
+#define SSE_LOOP_WS(GX1, EX1, F)          \
+    for (int i=0; i<8; ++i) {             \
+        LH(GX1, gback, i*2);              \
+        LH(EX1, wback, fixedaddress+i*2); \
+        F;                                \
+        SH(GX1, gback, i*2);              \
+    }
 
 #define SSE_LOOP_DS_ITEM(EX1, F, i)     \
     LWU(EX1, wback, fixedaddress+i*4);  \
@@ -382,6 +381,20 @@
     SSE_LOOP_Q_ITEM(GX1, EX1, F, 0) \
     SSE_LOOP_Q_ITEM(GX1, EX1, F, 1)
 
+
+#define SSE_LOOP_FQ_ITEM(GX1, EX1, F, i)            \
+    v0 = sse_get_reg_empty(dyn, ninst, x5, GX1, 0); \
+    FLD(v0, gback, i*8);                            \
+    v1 = sse_get_reg_empty(dyn, ninst, x5, EX1, 0); \
+    FLD(v1, wback, fixedaddress+i*8);               \
+    F;                                              \
+    FSD(v0, gback, i*8);
+
+#define SSE_LOOP_FQ(GX1, EX1, F)     \
+    SSE_LOOP_FQ_ITEM(GX1, EX1, F, 0) \
+    SSE_LOOP_FQ_ITEM(GX1, EX1, F, 1)
+
+
 #define SSE_LOOP_MV_Q_ITEM(s, i)      \
     LD(s, wback, fixedaddress+i*8);   \
     SD(s, gback, i*8);