about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-04-11 21:05:37 +0800
committerGitHub <noreply@github.com>2023-04-11 15:05:37 +0200
commit75592bec01a0c5c1775561ba68bfd6c1878071ca (patch)
tree109d4c4a1a103c59ee10d17642fb822456c724eb /src
parente96b1c810672a2493050a36d32f4961b3158901c (diff)
downloadbox64-75592bec01a0c5c1775561ba68bfd6c1878071ca.tar.gz
box64-75592bec01a0c5c1775561ba68bfd6c1878071ca.zip
[RV64_DYNAREC] Added more opcodes for SV, some fixes & optims also (#686)
* [RV64_DYNAREC] Added F0 09 LOCK OR opcode

* [RV64_DYNAREC] Added 66 0F 59 MULPD opcode

* [RV64_DYNAREC] Added 66 0F 59 MULPD opcode

* [RV64_DYNAREC] Added 28 SUB opcode

* [RV64_DYNAREC] Added 66 0F 73 /6 PSLLQ opcode

* [RV64_DYNAREC] Added 66 0F 60 PUNPCKLBW opcode

* [RV64_DYNAREC] Added 0F 56 ORPS opcode & optims

* [RV64_DYNAREC] Added 0F 50 MOVMSKPS opcode

* [RV64_DYNAREC] Fixed typos (thanks to cosim!)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c9
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c32
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c80
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f0.c22
4 files changed, 131 insertions, 12 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index 34c3fedb..97defd9a 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -250,6 +250,15 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             i64 = F32S;
             emit_and32c(dyn, ninst, rex, xRAX, i64, x3, x4);
             break;
+        case 0x28:
+            INST_NAME("SUB Eb, Gb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_sub8(dyn, ninst, x1, x2, x4, x5, x6);
+            EBBACK(x5, 0);
+            break;
         case 0x29:
             INST_NAME("SUB Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 9b267061..bb8e7834 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -268,12 +268,38 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
         GOCOND(0x40, "CMOV", "Gd, Ed");
         #undef GO
+        case 0x50:
+            INST_NAME("MOVMSKPS Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEX(x1, 0);
+            XOR(gd, gd, gd);
+            for(int i=0; i<4; ++i) {
+                LWU(x2, wback, fixedaddress+i*4);
+                SRLI(x2, x2, 31-i);
+                if (i>0) ANDI(x2, x2, 1<<i);
+                OR(gd, gd, x2);
+            }
+            break;
         case 0x54:
             INST_NAME("ANDPS Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
-            GETGX(x2);
-            SSE_LOOP_Q(x3, x4, AND(x3, x3, x4));
+            gd = ((nextop&0x38)>>3)+(rex.r<<3);
+            if(!(MODREG && gd==(nextop&7)+(rex.b<<3))) {
+                GETGX(x1);
+                GETEX(x2, 0);
+                SSE_LOOP_Q(x3, x4, AND(x3, x3, x4));
+            }
+            break;
+        case 0x56:
+            INST_NAME("ORPS Gx, Ex");
+            nextop = F8;
+            gd = ((nextop&0x38)>>3)+(rex.r<<3);
+            if(!(MODREG && gd==(nextop&7)+(rex.b<<3))) {
+                GETGX(x1);
+                GETEX(x2, 0);
+                SSE_LOOP_Q(x3, x4, OR(x3, x3, x4));
+            }
             break;
         case 0x57:
             INST_NAME("XORPS Gx, Ex");
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index efaa4a14..85ad9b38 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -198,6 +198,26 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGX(x2);
             SSE_LOOP_FQ(x3, x4, FADDD(v0, v0, v1));
             break;
+        case 0x59:
+            INST_NAME("MULPD Gx, Ex");
+            nextop = F8;
+            GETEX(x1, 0);
+            GETGX(x2);
+            SSE_LOOP_FQ(x3, x4, {
+                if(!box64_dynarec_fastnan) {
+                    FEQD(x3, v0, v0);
+                    FEQD(x4, v1, v1);
+                }
+                FMULD(v0, v0, v1);
+                if(!box64_dynarec_fastnan) {
+                    AND(x3, x3, x4);
+                    BEQZ(x3, 16);
+                    FEQD(x3, v0, v0);
+                    BNEZ(x3, 8);
+                    FNEGD(v0, v0);
+                }
+            });
+            break;
         case 0x5C:
             INST_NAME("SUBPD Gx, Ex");
             nextop = F8;
@@ -206,23 +226,47 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGX(x2);
             SSE_LOOP_FQ(x3, x4, FSUBD(v0, v0, v1));
             break;
+        case 0x60:
+            INST_NAME("PUNPCKLBW Gx,Ex");
+            nextop = F8;
+            GETGX(x2);
+            for(int i=7; i>0; --i) { // 0 is untouched
+                // GX->ub[2 * i] = GX->ub[i];
+                LBU(x3, gback, i);
+                SB(x3, gback, 2*i);
+            }
+            if (MODREG && gd==(nextop&7)+(rex.b<<3)) {
+                for(int i=0; i<8; ++i) {
+                    // GX->ub[2 * i + 1] = GX->ub[2 * i];
+                    LBU(x3, gback, 2*i);
+                    SB(x3, gback, 2*i+1);
+                }
+            } else {
+                GETEX(x1, 0);
+                for(int i=0; i<8; ++i) {
+                    // GX->ub[2 * i + 1] = EX->ub[i];
+                    LBU(x3, wback, fixedaddress+i);
+                    SB(x3, gback, 2*i+1);
+                }
+            }
+            break;
         case 0x61:
             INST_NAME("PUNPCKLWD Gx,Ex");
             nextop = F8;
-            GETEX(x1, 0);
             GETGX(x2);
             for(int i=3; i>0; --i) {
                 // GX->uw[2 * i] = GX->uw[i];
                 LHU(x3, gback, i*2);
                 SH(x3, gback, 2*i*2);
             }
-            if (MODREG && (ed==gd)) {
+            if (MODREG && gd==(nextop&7)+(rex.b<<3)) {
                 for(int i=0; i<4; ++i) {
                     // GX->uw[2 * i + 1] = GX->uw[2 * i];
                     LHU(x3, gback, 2*i*2);
                     SH(x3, gback, (2*i+1)*2);
                 }
             } else {
+                GETEX(x1, 0);
                 for(int i=0; i<4; ++i) {
                     // GX->uw[2 * i + 1] = EX->uw[i];
                     LHU(x3, wback, fixedaddress+i*2);
@@ -255,7 +299,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x67:
             INST_NAME("PACKUSWB Gx, Ex");
             nextop = F8;
-            GETEX(x1, 0);
             GETGX(x2);
             ADDI(x5, xZR, 0xFF);
             for(int i=0; i<8; ++i) {
@@ -268,11 +311,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 AND(x3, x3, x4);
                 SB(x3, gback, i);
             }
-            if (MODREG && (ed==gd)) {
+            if (MODREG && gd==(nextop&7)+(rex.b<<3)) {
                 // GX->q[1] = GX->q[0];
                 LD(x3, gback, 0*8);
                 SD(x3, gback, 1*8);
             } else {
+                GETEX(x1, 0);
                 for(int i=0; i<8; ++i) {
                     // GX->ub[8+i] = (EX->sw[i]<0)?0:((EX->sw[i]>0xff)?0xff:EX->sw[i]);
                     LH(x3, wback, fixedaddress+i*2);
@@ -288,20 +332,20 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x69:
             INST_NAME("PUNPCKHWD Gx,Ex");
             nextop = F8;
-            GETEX(x1, 0);
             GETGX(x2);
             for(int i=0; i<4; ++i) {
                 // GX->uw[2 * i] = GX->uw[i + 4];
                 LHU(x3, gback, (i+4)*2);
                 SH(x3, gback, 2*i*2);
             }
-            if (MODREG && (ed==gd)) {
+            if (MODREG && gd==(nextop&7)+(rex.b<<3)) {
                 for(int i=0; i<4; ++i) {
                     // GX->uw[2 * i + 1] = GX->uw[2 * i];
                     LHU(x3, gback, 2*i*2);
                     SH(x3, gback, (2*i+1)*2);
                 }
             } else {
+                GETEX(x1, 0);
                 for(int i=0; i<4; ++i) {
                     // GX->uw[2 * i + 1] = EX->uw[i + 4];
                     LHU(x3, wback, fixedaddress+(i+4)*2);
@@ -332,7 +376,6 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x6B:
             INST_NAME("PACKSSDW Gx,Ex");
             nextop = F8;
-            GETEX(x1, 0);
             GETGX(x2);
             MOV64x(x5, 32768);
             NEG(x6, x5);
@@ -345,11 +388,12 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 MV(x3, x6);
                 SH(x3, gback, i*2);
             }
-            if (MODREG && (ed==gd)) {
+            if (MODREG && gd==(nextop&7)+(rex.b<<3)) {
                 // GX->q[1] = GX->q[0];
                 LD(x3, gback, 0*8);
                 SD(x3, gback, 1*8);
             } else {
+                GETEX(x1, 0);
                 for(int i=0; i<4; ++i) {
                     // GX->sw[4+i] = (EX->sd[i]<-32768)?-32768:((EX->sd[i]>32767)?32767:EX->sd[i]);
                     LW(x3, wback, fixedaddress+i*4);
@@ -491,7 +535,7 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         LD(x4, wback, fixedaddress+8);
                         SRLI(x3, x3, u8);
                         SRLI(x4, x4, u8);
-                        SD(x3, wback, fixedaddress+8);
+                        SD(x3, wback, fixedaddress+0);
                         SD(x4, wback, fixedaddress+8);
                     }
                     break;
@@ -523,6 +567,24 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         }
                     }
                     break;
+                case 6:
+                    INST_NAME("PSLLQ Ex, Ib");
+                    GETEX(x1, 1);
+                    u8 = F8;
+                    if(!u8) break;
+                    if(u8>63) {
+                        // just zero dest
+                        SD(xZR, x1, fixedaddress+0);
+                        SD(xZR, x1, fixedaddress+8);
+                    } else {
+                        LD(x3, wback, fixedaddress+0);
+                        LD(x4, wback, fixedaddress+8);
+                        SLLI(x3, x3, u8);
+                        SLLI(x4, x4, u8);
+                        SD(x3, wback, fixedaddress+0);
+                        SD(x4, wback, fixedaddress+8);
+                    }
+                    break;
                 case 7:
                     INST_NAME("PSLLDQ Ex, Ib");
                     GETEX(x1, 1);
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c
index 4696c70c..ef46e9eb 100644
--- a/src/dynarec/rv64/dynarec_rv64_f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_f0.c
@@ -57,6 +57,28 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     // TODO: Take care of unligned memory access for all the LOCK ones.
     // https://github.com/ptitSeb/box64/pull/604
     switch(opcode) {
+        case 0x09:
+            INST_NAME("LOCK OR Ed, Gd");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGD;
+            SMDMB();
+            if (MODREG) {
+                ed = xRAX+(nextop&7)+(rex.b<<3);
+                emit_or32(dyn, ninst, rex, ed, gd, x3, x4);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, LOCK_LOCK, 0, 0);
+                MARKLOCK;
+                LRxw(x1, wback, 1, 1);
+                OR(x1, x1, gd);
+                SCxw(x3, x1, wback, 1, 1);
+                BNEZ_MARKLOCK(x3);
+                IFX(X_ALL|X_PEND) {
+                    emit_or32(dyn, ninst, rex, x1, gd, x3, x4);
+                }
+            }
+            SMDMB();
+            break;
         case 0x0F:
             nextop = F8;
             switch(nextop) {